summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig16
-rw-r--r--fs/9p/Makefile2
-rw-r--r--fs/9p/acl.c395
-rw-r--r--fs/9p/acl.h49
-rw-r--r--fs/9p/fid.c1
-rw-r--r--fs/9p/v9fs.c22
-rw-r--r--fs/9p/v9fs.h50
-rw-r--r--fs/9p/v9fs_vfs.h4
-rw-r--r--fs/9p/vfs_addr.c30
-rw-r--r--fs/9p/vfs_dentry.c4
-rw-r--r--fs/9p/vfs_dir.c4
-rw-r--r--fs/9p/vfs_file.c265
-rw-r--r--fs/9p/vfs_inode.c779
-rw-r--r--fs/9p/vfs_inode_dotl.c824
-rw-r--r--fs/9p/vfs_super.c36
-rw-r--r--fs/9p/xattr.c54
-rw-r--r--fs/9p/xattr.h6
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/dir.c13
-rw-r--r--fs/adfs/super.c20
-rw-r--r--fs/affs/amigaffs.c4
-rw-r--r--fs/affs/namei.c68
-rw-r--r--fs/affs/super.c20
-rw-r--r--fs/afs/dir.c10
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/security.c7
-rw-r--r--fs/afs/super.c29
-rw-r--r--fs/aio.c29
-rw-r--r--fs/anon_inodes.c22
-rw-r--r--fs/autofs/Kconfig22
-rw-r--r--fs/autofs/Makefile7
-rw-r--r--fs/autofs/autofs_i.h165
-rw-r--r--fs/autofs/dirhash.c250
-rw-r--r--fs/autofs/init.c52
-rw-r--r--fs/autofs/inode.c288
-rw-r--r--fs/autofs/root.c645
-rw-r--r--fs/autofs/symlink.c26
-rw-r--r--fs/autofs/waitq.c205
-rw-r--r--fs/autofs4/autofs_i.h21
-rw-r--r--fs/autofs4/expire.c141
-rw-r--r--fs/autofs4/init.c8
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c103
-rw-r--r--fs/autofs4/waitq.c23
-rw-r--r--fs/bad_inode.c5
-rw-r--r--fs/befs/endian.h16
-rw-r--r--fs/befs/linuxvfs.c23
-rw-r--r--fs/bfs/inode.c17
-rw-r--r--fs/binfmt_elf.c23
-rw-r--r--fs/binfmt_misc.c8
-rw-r--r--fs/bio.c23
-rw-r--r--fs/block_dev.c18
-rw-r--r--fs/btrfs/acl.c21
-rw-r--r--fs/btrfs/compression.c17
-rw-r--r--fs/btrfs/ctree.c57
-rw-r--r--fs/btrfs/ctree.h102
-rw-r--r--fs/btrfs/dir-item.c2
-rw-r--r--fs/btrfs/disk-io.c73
-rw-r--r--fs/btrfs/export.c82
-rw-r--r--fs/btrfs/extent-tree.c749
-rw-r--r--fs/btrfs/extent_io.c245
-rw-r--r--fs/btrfs/extent_io.h7
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/file.c99
-rw-r--r--fs/btrfs/free-space-cache.c753
-rw-r--r--fs/btrfs/free-space-cache.h18
-rw-r--r--fs/btrfs/inode.c518
-rw-r--r--fs/btrfs/ioctl.c441
-rw-r--r--fs/btrfs/ioctl.h17
-rw-r--r--fs/btrfs/ordered-data.c69
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/orphan.c6
-rw-r--r--fs/btrfs/relocation.c109
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/super.c100
-rw-r--r--fs/btrfs/transaction.c239
-rw-r--r--fs/btrfs/transaction.h8
-rw-r--r--fs/btrfs/tree-defrag.c2
-rw-r--r--fs/btrfs/tree-log.c38
-rw-r--r--fs/btrfs/volumes.c27
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/btrfs/zlib.c5
-rw-r--r--fs/buffer.c37
-rw-r--r--fs/ceph/Makefile23
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/ceph/caps.c17
-rw-r--r--fs/ceph/debugfs.c9
-rw-r--r--fs/ceph/dir.c71
-rw-r--r--fs/ceph/export.c2
-rw-r--r--fs/ceph/file.c65
-rw-r--r--fs/ceph/inode.c92
-rw-r--r--fs/ceph/ioctl.h2
-rw-r--r--fs/ceph/locks.c94
-rw-r--r--fs/ceph/mds_client.c97
-rw-r--r--fs/ceph/mds_client.h35
-rw-r--r--fs/ceph/super.c63
-rw-r--r--fs/ceph/super.h8
-rw-r--r--fs/char_dev.c13
-rw-r--r--fs/cifs/Kconfig11
-rw-r--r--fs/cifs/Makefile4
-rw-r--r--fs/cifs/README9
-rw-r--r--fs/cifs/TODO2
-rw-r--r--fs/cifs/cache.c16
-rw-r--r--fs/cifs/cifs_debug.c22
-rw-r--r--fs/cifs/cifs_fs_sb.h7
-rw-r--r--fs/cifs/cifs_spnego.c10
-rw-r--r--fs/cifs/cifsacl.c51
-rw-r--r--fs/cifs/cifsacl.h4
-rw-r--r--fs/cifs/cifsencrypt.c433
-rw-r--r--fs/cifs/cifsfs.c59
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h79
-rw-r--r--fs/cifs/cifspdu.h13
-rw-r--r--fs/cifs/cifsproto.h26
-rw-r--r--fs/cifs/cifssmb.c192
-rw-r--r--fs/cifs/connect.c751
-rw-r--r--fs/cifs/dir.c83
-rw-r--r--fs/cifs/dns_resolve.c2
-rw-r--r--fs/cifs/file.c364
-rw-r--r--fs/cifs/fscache.c12
-rw-r--r--fs/cifs/inode.c93
-rw-r--r--fs/cifs/ioctl.c16
-rw-r--r--fs/cifs/link.c4
-rw-r--r--fs/cifs/misc.c27
-rw-r--r--fs/cifs/readdir.c48
-rw-r--r--fs/cifs/sess.c297
-rw-r--r--fs/cifs/transport.c8
-rw-r--r--fs/cifs/xattr.c55
-rw-r--r--fs/coda/cache.c4
-rw-r--r--fs/coda/dir.c20
-rw-r--r--fs/coda/inode.c19
-rw-r--r--fs/coda/pioctl.c6
-rw-r--r--fs/compat.c72
-rw-r--r--fs/compat_ioctl.c33
-rw-r--r--fs/configfs/configfs_internal.h4
-rw-r--r--fs/configfs/dir.c24
-rw-r--r--fs/configfs/inode.c8
-rw-r--r--fs/configfs/mount.c8
-rw-r--r--fs/cramfs/inode.c119
-rw-r--r--fs/dcache.c1375
-rw-r--r--fs/debugfs/inode.c8
-rw-r--r--fs/devpts/inode.c32
-rw-r--r--fs/dlm/lowcomms.c63
-rw-r--r--fs/ecryptfs/dentry.c9
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h1
-rw-r--r--fs/ecryptfs/inode.c23
-rw-r--r--fs/ecryptfs/keystore.c45
-rw-r--r--fs/ecryptfs/main.c24
-rw-r--r--fs/ecryptfs/super.c15
-rw-r--r--fs/efs/super.c17
-rw-r--r--fs/eventpoll.c20
-rw-r--r--fs/exec.c41
-rw-r--r--fs/exofs/super.c19
-rw-r--r--fs/exportfs/expfs.c14
-rw-r--r--fs/ext2/acl.c11
-rw-r--r--fs/ext2/acl.h2
-rw-r--r--fs/ext2/dir.c19
-rw-r--r--fs/ext2/namei.c2
-rw-r--r--fs/ext2/super.c42
-rw-r--r--fs/ext2/xattr.c10
-rw-r--r--fs/ext3/acl.c11
-rw-r--r--fs/ext3/acl.h2
-rw-r--r--fs/ext3/balloc.c266
-rw-r--r--fs/ext3/dir.c15
-rw-r--r--fs/ext3/inode.c6
-rw-r--r--fs/ext3/ioctl.c22
-rw-r--r--fs/ext3/namei.c138
-rw-r--r--fs/ext3/resize.c65
-rw-r--r--fs/ext3/super.c82
-rw-r--r--fs/ext3/xattr.c2
-rw-r--r--fs/ext4/acl.c11
-rw-r--r--fs/ext4/acl.h2
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/dir.c56
-rw-r--r--fs/ext4/ext4.h98
-rw-r--r--fs/ext4/ext4_extents.h8
-rw-r--r--fs/ext4/ext4_jbd2.h2
-rw-r--r--fs/ext4/extents.c92
-rw-r--r--fs/ext4/file.c22
-rw-r--r--fs/ext4/fsync.c4
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c103
-rw-r--r--fs/ext4/ioctl.c24
-rw-r--r--fs/ext4/mballoc.c57
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/namei.c71
-rw-r--r--fs/ext4/page-io.c106
-rw-r--r--fs/ext4/resize.c69
-rw-r--r--fs/ext4/super.c434
-rw-r--r--fs/ext4/xattr.c28
-rw-r--r--fs/ext4/xattr.h2
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fat/namei_msdos.c32
-rw-r--r--fs/fat/namei_vfat.c64
-rw-r--r--fs/filesystems.c3
-rw-r--r--fs/freevxfs/vxfs_inode.c9
-rw-r--r--fs/freevxfs/vxfs_super.c9
-rw-r--r--fs/fs-writeback.c68
-rw-r--r--fs/fs_struct.c36
-rw-r--r--fs/fuse/control.c10
-rw-r--r--fs/fuse/dev.c156
-rw-r--r--fs/fuse/dir.c71
-rw-r--r--fs/fuse/file.c140
-rw-r--r--fs/fuse/fuse_i.h27
-rw-r--r--fs/fuse/inode.c60
-rw-r--r--fs/generic_acl.c20
-rw-r--r--fs/gfs2/acl.c5
-rw-r--r--fs/gfs2/acl.h2
-rw-r--r--fs/gfs2/bmap.c11
-rw-r--r--fs/gfs2/dentry.c22
-rw-r--r--fs/gfs2/export.c50
-rw-r--r--fs/gfs2/file.c2
-rw-r--r--fs/gfs2/glock.c92
-rw-r--r--fs/gfs2/glock.h28
-rw-r--r--fs/gfs2/glops.c1
-rw-r--r--fs/gfs2/incore.h13
-rw-r--r--fs/gfs2/inode.c165
-rw-r--r--fs/gfs2/inode.h6
-rw-r--r--fs/gfs2/lock_dlm.c15
-rw-r--r--fs/gfs2/ops_fstype.c53
-rw-r--r--fs/gfs2/ops_inode.c38
-rw-r--r--fs/gfs2/quota.c28
-rw-r--r--fs/gfs2/rgrp.c146
-rw-r--r--fs/gfs2/rgrp.h1
-rw-r--r--fs/gfs2/super.c9
-rw-r--r--fs/gfs2/xattr.c23
-rw-r--r--fs/hfs/dir.c2
-rw-r--r--fs/hfs/hfs_fs.h8
-rw-r--r--fs/hfs/string.c17
-rw-r--r--fs/hfs/super.c20
-rw-r--r--fs/hfs/sysdep.c7
-rw-r--r--fs/hfsplus/bfind.c6
-rw-r--r--fs/hfsplus/bitmap.c3
-rw-r--r--fs/hfsplus/bnode.c70
-rw-r--r--fs/hfsplus/brec.c28
-rw-r--r--fs/hfsplus/btree.c33
-rw-r--r--fs/hfsplus/catalog.c85
-rw-r--r--fs/hfsplus/dir.c43
-rw-r--r--fs/hfsplus/extents.c96
-rw-r--r--fs/hfsplus/hfsplus_fs.h130
-rw-r--r--fs/hfsplus/hfsplus_raw.h3
-rw-r--r--fs/hfsplus/inode.c89
-rw-r--r--fs/hfsplus/ioctl.c8
-rw-r--r--fs/hfsplus/options.c44
-rw-r--r--fs/hfsplus/part_tbl.c129
-rw-r--r--fs/hfsplus/super.c152
-rw-r--r--fs/hfsplus/unicode.c56
-rw-r--r--fs/hfsplus/wrapper.c178
-rw-r--r--fs/hostfs/hostfs_kern.c52
-rw-r--r--fs/hpfs/buffer.c4
-rw-r--r--fs/hpfs/dentry.c27
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c20
-rw-r--r--fs/hppfs/hppfs.c17
-rw-r--r--fs/hugetlbfs/inode.c20
-rw-r--r--fs/inode.c50
-rw-r--r--fs/internal.h3
-rw-r--r--fs/ioctl.c40
-rw-r--r--fs/ioprio.c13
-rw-r--r--fs/isofs/inode.c140
-rw-r--r--fs/isofs/namei.c5
-rw-r--r--fs/jbd/transaction.c2
-rw-r--r--fs/jbd2/journal.c58
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/transaction.c8
-rw-r--r--fs/jffs2/acl.c5
-rw-r--r--fs/jffs2/acl.h2
-rw-r--r--fs/jffs2/build.c2
-rw-r--r--fs/jffs2/compr.c6
-rw-r--r--fs/jffs2/compr.h4
-rw-r--r--fs/jffs2/compr_lzo.c4
-rw-r--r--fs/jffs2/compr_rtime.c6
-rw-r--r--fs/jffs2/compr_rubin.c11
-rw-r--r--fs/jffs2/compr_zlib.c6
-rw-r--r--fs/jffs2/dir.c3
-rw-r--r--fs/jffs2/erase.c2
-rw-r--r--fs/jffs2/fs.c22
-rw-r--r--fs/jffs2/gc.c7
-rw-r--r--fs/jffs2/jffs2_fs_sb.h1
-rw-r--r--fs/jffs2/nodelist.c8
-rw-r--r--fs/jffs2/nodelist.h3
-rw-r--r--fs/jffs2/scan.c12
-rw-r--r--fs/jffs2/super.c18
-rw-r--r--fs/jfs/acl.c8
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/namei.c63
-rw-r--r--fs/jfs/super.c21
-rw-r--r--fs/libfs.c77
-rw-r--r--fs/lockd/Makefile6
-rw-r--r--fs/lockd/clnt4xdr.c605
-rw-r--r--fs/lockd/clntlock.c5
-rw-r--r--fs/lockd/clntproc.c19
-rw-r--r--fs/lockd/clntxdr.c627
-rw-r--r--fs/lockd/host.c416
-rw-r--r--fs/lockd/mon.c110
-rw-r--r--fs/lockd/svc4proc.c21
-rw-r--r--fs/lockd/svclock.c35
-rw-r--r--fs/lockd/svcproc.c29
-rw-r--r--fs/lockd/xdr.c287
-rw-r--r--fs/lockd/xdr4.c255
-rw-r--r--fs/locks.c93
-rw-r--r--fs/logfs/dev_bdev.c15
-rw-r--r--fs/logfs/dev_mtd.c18
-rw-r--r--fs/logfs/dir.c6
-rw-r--r--fs/logfs/inode.c9
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/logfs.h22
-rw-r--r--fs/logfs/readwrite.c3
-rw-r--r--fs/logfs/super.c77
-rw-r--r--fs/mbcache.c12
-rw-r--r--fs/minix/inode.c18
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/namei.c862
-rw-r--r--fs/namespace.c292
-rw-r--r--fs/ncpfs/dir.c89
-rw-r--r--fs/ncpfs/file.c1
-rw-r--r--fs/ncpfs/inode.c28
-rw-r--r--fs/ncpfs/ioctl.c1
-rw-r--r--fs/ncpfs/ncplib_kernel.h16
-rw-r--r--fs/nfs/callback.c84
-rw-r--r--fs/nfs/callback.h59
-rw-r--r--fs/nfs/callback_proc.c326
-rw-r--r--fs/nfs/callback_xdr.c143
-rw-r--r--fs/nfs/client.c302
-rw-r--r--fs/nfs/delegation.c363
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c322
-rw-r--r--fs/nfs/direct.c4
-rw-r--r--fs/nfs/file.c3
-rw-r--r--fs/nfs/getroot.c10
-rw-r--r--fs/nfs/idmap.c4
-rw-r--r--fs/nfs/inode.c13
-rw-r--r--fs/nfs/internal.h28
-rw-r--r--fs/nfs/mount_clnt.c87
-rw-r--r--fs/nfs/namespace.c17
-rw-r--r--fs/nfs/nfs2xdr.c1296
-rw-r--r--fs/nfs/nfs3xdr.c2815
-rw-r--r--fs/nfs/nfs4_fs.h13
-rw-r--r--fs/nfs/nfs4filelayout.c6
-rw-r--r--fs/nfs/nfs4proc.c205
-rw-r--r--fs/nfs/nfs4renewd.c11
-rw-r--r--fs/nfs/nfs4state.c293
-rw-r--r--fs/nfs/nfs4xdr.c1432
-rw-r--r--fs/nfs/pagelist.c19
-rw-r--r--fs/nfs/pnfs.c524
-rw-r--r--fs/nfs/pnfs.h76
-rw-r--r--fs/nfs/proc.c5
-rw-r--r--fs/nfs/read.c1
-rw-r--r--fs/nfs/super.c127
-rw-r--r--fs/nfs/unlink.c8
-rw-r--r--fs/nfs/write.c3
-rw-r--r--fs/nfsd/nfs3xdr.c6
-rw-r--r--fs/nfsd/nfs4callback.c690
-rw-r--r--fs/nfsd/nfs4state.c46
-rw-r--r--fs/nfsd/nfsctl.c8
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nfsd/xdr4.h21
-rw-r--r--fs/nilfs2/bmap.c47
-rw-r--r--fs/nilfs2/btnode.c3
-rw-r--r--fs/nilfs2/dat.c2
-rw-r--r--fs/nilfs2/dir.c3
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/nilfs2/gcinode.c9
-rw-r--r--fs/nilfs2/ifile.c11
-rw-r--r--fs/nilfs2/inode.c190
-rw-r--r--fs/nilfs2/ioctl.c28
-rw-r--r--fs/nilfs2/mdt.c32
-rw-r--r--fs/nilfs2/namei.c1
-rw-r--r--fs/nilfs2/nilfs.h15
-rw-r--r--fs/nilfs2/page.c86
-rw-r--r--fs/nilfs2/page.h3
-rw-r--r--fs/nilfs2/recovery.c2
-rw-r--r--fs/nilfs2/sb.h8
-rw-r--r--fs/nilfs2/segment.c43
-rw-r--r--fs/nilfs2/super.c58
-rw-r--r--fs/nilfs2/the_nilfs.c6
-rw-r--r--fs/nilfs2/the_nilfs.h3
-rw-r--r--fs/notify/Kconfig2
-rw-r--r--fs/notify/fanotify/Kconfig2
-rw-r--r--fs/notify/fanotify/fanotify.c33
-rw-r--r--fs/notify/fanotify/fanotify_user.c175
-rw-r--r--fs/notify/fsnotify.c43
-rw-r--r--fs/notify/inode_mark.c9
-rw-r--r--fs/notify/inotify/inotify_user.c3
-rw-r--r--fs/notify/vfsmount_mark.c6
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/file.c35
-rw-r--r--fs/ntfs/inode.c9
-rw-r--r--fs/ntfs/super.c15
-rw-r--r--fs/ocfs2/Kconfig2
-rw-r--r--fs/ocfs2/acl.c8
-rw-r--r--fs/ocfs2/acl.h2
-rw-r--r--fs/ocfs2/alloc.c77
-rw-r--r--fs/ocfs2/alloc.h4
-rw-r--r--fs/ocfs2/aops.c66
-rw-r--r--fs/ocfs2/aops.h23
-rw-r--r--fs/ocfs2/cluster/heartbeat.c263
-rw-r--r--fs/ocfs2/cluster/masklog.c3
-rw-r--r--fs/ocfs2/cluster/masklog.h15
-rw-r--r--fs/ocfs2/cluster/netdebug.c286
-rw-r--r--fs/ocfs2/cluster/quorum.c4
-rw-r--r--fs/ocfs2/cluster/tcp.c145
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h33
-rw-r--r--fs/ocfs2/dcache.c21
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/dlm/dlmast.c76
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h86
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c200
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h5
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c12
-rw-r--r--fs/ocfs2/dlm/dlmlock.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c40
-rw-r--r--fs/ocfs2/dlm/dlmthread.c132
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c17
-rw-r--r--fs/ocfs2/export.c4
-rw-r--r--fs/ocfs2/file.c22
-rw-r--r--fs/ocfs2/file.h2
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/ocfs2/namei.c15
-rw-r--r--fs/ocfs2/ocfs2.h11
-rw-r--r--fs/ocfs2/ocfs2_fs.h2
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/super.c21
-rw-r--r--fs/omfs/inode.c9
-rw-r--r--fs/open.c6
-rw-r--r--fs/openpromfs/inode.c17
-rw-r--r--fs/pipe.c43
-rw-r--r--fs/pnode.c4
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c28
-rw-r--r--fs/proc/base.c179
-rw-r--r--fs/proc/consoles.c114
-rw-r--r--fs/proc/devices.c4
-rw-r--r--fs/proc/generic.c21
-rw-r--r--fs/proc/inode.c17
-rw-r--r--fs/proc/internal.h5
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/proc_sysctl.c31
-rw-r--r--fs/proc/proc_tty.c26
-rw-r--r--fs/proc/root.c16
-rw-r--r--fs/proc/softirqs.c6
-rw-r--r--fs/proc/stat.c2
-rw-r--r--fs/proc/task_mmu.c8
-rw-r--r--fs/proc/task_nommu.c7
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/qnx4/inode.c18
-rw-r--r--fs/quota/dquot.c18
-rw-r--r--fs/quota/quota_tree.c9
-rw-r--r--fs/ramfs/inode.c17
-rw-r--r--fs/read_write.c63
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/reiserfs/ioctl.c8
-rw-r--r--fs/reiserfs/journal.c1
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/super.c19
-rw-r--r--fs/reiserfs/xattr.c18
-rw-r--r--fs/reiserfs/xattr_acl.c6
-rw-r--r--fs/romfs/super.c26
-rw-r--r--fs/select.c2
-rw-r--r--fs/smbfs/Kconfig56
-rw-r--r--fs/smbfs/Makefile18
-rw-r--r--fs/smbfs/cache.c208
-rw-r--r--fs/smbfs/dir.c696
-rw-r--r--fs/smbfs/file.c454
-rw-r--r--fs/smbfs/getopt.c64
-rw-r--r--fs/smbfs/getopt.h14
-rw-r--r--fs/smbfs/inode.c843
-rw-r--r--fs/smbfs/ioctl.c69
-rw-r--r--fs/smbfs/proc.c3503
-rw-r--r--fs/smbfs/proto.h87
-rw-r--r--fs/smbfs/request.c818
-rw-r--r--fs/smbfs/request.h70
-rw-r--r--fs/smbfs/smb_debug.h34
-rw-r--r--fs/smbfs/smbiod.c344
-rw-r--r--fs/smbfs/sock.c386
-rw-r--r--fs/smbfs/symlink.c68
-rw-r--r--fs/splice.c24
-rw-r--r--fs/squashfs/super.c19
-rw-r--r--fs/squashfs/xattr.c9
-rw-r--r--fs/squashfs/xattr.h4
-rw-r--r--fs/squashfs/xattr_id.c1
-rw-r--r--fs/super.c116
-rw-r--r--fs/sysfs/dir.c10
-rw-r--r--fs/sysfs/group.c10
-rw-r--r--fs/sysfs/inode.c12
-rw-r--r--fs/sysfs/mount.c32
-rw-r--r--fs/sysfs/sysfs.h3
-rw-r--r--fs/sysv/inode.c9
-rw-r--r--fs/sysv/namei.c5
-rw-r--r--fs/sysv/super.c19
-rw-r--r--fs/ubifs/super.c23
-rw-r--r--fs/udf/Kconfig1
-rw-r--r--fs/udf/balloc.c3
-rw-r--r--fs/udf/dir.c5
-rw-r--r--fs/udf/file.c11
-rw-r--r--fs/udf/ialloc.c21
-rw-r--r--fs/udf/inode.c51
-rw-r--r--fs/udf/namei.c107
-rw-r--r--fs/udf/partition.c27
-rw-r--r--fs/udf/super.c85
-rw-r--r--fs/udf/symlink.c12
-rw-r--r--fs/udf/udf_i.h13
-rw-r--r--fs/udf/udf_sb.h22
-rw-r--r--fs/udf/udfdecl.h4
-rw-r--r--fs/ufs/super.c17
-rw-r--r--fs/xfs/linux-2.6/sv.h59
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c526
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h16
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c272
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h22
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c39
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c93
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h59
-rw-r--r--fs/xfs/quota/xfs_dquot.c1
-rw-r--r--fs/xfs/xfs_acl.h2
-rw-r--r--fs/xfs/xfs_ag.h2
-rw-r--r--fs/xfs/xfs_alloc.c351
-rw-r--r--fs/xfs/xfs_attr_leaf.c4
-rw-r--r--fs/xfs/xfs_bmap.c85
-rw-r--r--fs/xfs/xfs_bmap.h5
-rw-r--r--fs/xfs/xfs_btree.c9
-rw-r--r--fs/xfs/xfs_buf_item.c32
-rw-r--r--fs/xfs/xfs_buf_item.h11
-rw-r--r--fs/xfs/xfs_dfrag.c13
-rw-r--r--fs/xfs/xfs_error.c3
-rw-r--r--fs/xfs/xfs_error.h5
-rw-r--r--fs/xfs/xfs_extfree_item.c97
-rw-r--r--fs/xfs/xfs_extfree_item.h11
-rw-r--r--fs/xfs/xfs_filestream.c8
-rw-r--r--fs/xfs/xfs_fsops.c1
-rw-r--r--fs/xfs/xfs_iget.c90
-rw-r--r--fs/xfs/xfs_inode.c54
-rw-r--r--fs/xfs/xfs_inode.h15
-rw-r--r--fs/xfs/xfs_inode_item.c121
-rw-r--r--fs/xfs/xfs_iomap.c233
-rw-r--r--fs/xfs/xfs_iomap.h27
-rw-r--r--fs/xfs/xfs_log.c739
-rw-r--r--fs/xfs/xfs_log_cil.c17
-rw-r--r--fs/xfs/xfs_log_priv.h127
-rw-r--r--fs/xfs/xfs_log_recover.c620
-rw-r--r--fs/xfs/xfs_mount.c24
-rw-r--r--fs/xfs/xfs_mount.h14
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_quota.h20
-rw-r--r--fs/xfs/xfs_rename.c1
-rw-r--r--fs/xfs/xfs_trans.c79
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c232
-rw-r--r--fs/xfs/xfs_trans_extfree.c8
-rw-r--r--fs/xfs/xfs_trans_priv.h35
-rw-r--r--fs/xfs/xfs_vnodeops.c61
561 files changed, 25314 insertions, 23299 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 7952337..814ac4e 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -9,6 +9,8 @@ config 9P_FS
If unsure, say N.
+if 9P_FS
+
config 9P_FSCACHE
bool "Enable 9P client caching support (EXPERIMENTAL)"
depends on EXPERIMENTAL
@@ -17,3 +19,17 @@ config 9P_FSCACHE
Choose Y here to enable persistent, read-only local
caching support for 9p clients using FS-Cache
+
+config 9P_FS_POSIX_ACL
+ bool "9P POSIX Access Control Lists"
+ select FS_POSIX_ACL
+ help
+ POSIX Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
+
+ To learn more about Access Control Lists, visit the POSIX ACLs for
+ Linux website <http://acl.bestbits.at/>.
+
+ If you don't know what Access Control Lists are, say N
+
+endif
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 91fba02..ab8c127 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_9P_FS) := 9p.o
9p-objs := \
vfs_super.o \
vfs_inode.o \
+ vfs_inode_dotl.o \
vfs_addr.o \
vfs_file.o \
vfs_dir.o \
@@ -13,3 +14,4 @@ obj-$(CONFIG_9P_FS) := 9p.o
xattr_user.o
9p-$(CONFIG_9P_FSCACHE) += cache.o
+9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
new file mode 100644
index 0000000..02a2cf6
--- /dev/null
+++ b/fs/9p/acl.c
@@ -0,0 +1,395 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/posix_acl_xattr.h>
+#include "xattr.h"
+#include "acl.h"
+#include "v9fs_vfs.h"
+#include "v9fs.h"
+
+static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
+{
+ ssize_t size;
+ void *value = NULL;
+ struct posix_acl *acl = NULL;
+
+ size = v9fs_fid_xattr_get(fid, name, NULL, 0);
+ if (size > 0) {
+ value = kzalloc(size, GFP_NOFS);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ size = v9fs_fid_xattr_get(fid, name, value, size);
+ if (size > 0) {
+ acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ goto err_out;
+ }
+ } else if (size == -ENODATA || size == 0 ||
+ size == -ENOSYS || size == -EOPNOTSUPP) {
+ acl = NULL;
+ } else
+ acl = ERR_PTR(-EIO);
+
+err_out:
+ kfree(value);
+ return acl;
+}
+
+int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
+{
+ int retval = 0;
+ struct posix_acl *pacl, *dacl;
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
+ set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL);
+ set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
+ return 0;
+ }
+ /* get the default/access acl values and cache them */
+ dacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_DEFAULT);
+ pacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_ACCESS);
+
+ if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
+ set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
+ set_cached_acl(inode, ACL_TYPE_ACCESS, pacl);
+ posix_acl_release(dacl);
+ posix_acl_release(pacl);
+ } else
+ retval = -EIO;
+
+ return retval;
+}
+
+static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
+{
+ struct posix_acl *acl;
+ /*
+ * 9p Always cache the acl value when
+ * instantiating the inode (v9fs_inode_from_fid)
+ */
+ acl = get_cached_acl(inode, type);
+ BUG_ON(acl == ACL_NOT_CACHED);
+ return acl;
+}
+
+int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
+{
+ struct posix_acl *acl;
+ struct v9fs_session_info *v9ses;
+
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
+ /*
+ * On access = client mode get the acl
+ * values from the server
+ */
+ return 0;
+ }
+ acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
+
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ int error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ return error;
+ }
+ return -EAGAIN;
+}
+
+static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
+{
+ int retval;
+ char *name;
+ size_t size;
+ void *buffer;
+ struct inode *inode = dentry->d_inode;
+
+ set_cached_acl(inode, type, acl);
+ /* Set a setxattr request to server */
+ size = posix_acl_xattr_size(acl->a_count);
+ buffer = kmalloc(size, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+ retval = posix_acl_to_xattr(acl, buffer, size);
+ if (retval < 0)
+ goto err_free_out;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ name = POSIX_ACL_XATTR_DEFAULT;
+ break;
+ default:
+ BUG();
+ }
+ retval = v9fs_xattr_set(dentry, name, buffer, size, 0);
+err_free_out:
+ kfree(buffer);
+ return retval;
+}
+
+int v9fs_acl_chmod(struct dentry *dentry)
+{
+ int retval = 0;
+ struct posix_acl *acl, *clone;
+ struct inode *inode = dentry->d_inode;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+ acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
+ if (acl) {
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+ retval = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!retval)
+ retval = v9fs_set_acl(dentry, ACL_TYPE_ACCESS, clone);
+ posix_acl_release(clone);
+ }
+ return retval;
+}
+
+int v9fs_set_create_acl(struct dentry *dentry,
+ struct posix_acl *dpacl, struct posix_acl *pacl)
+{
+ if (dpacl)
+ v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
+ if (pacl)
+ v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
+ posix_acl_release(dpacl);
+ posix_acl_release(pacl);
+ return 0;
+}
+
+int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+ struct posix_acl **dpacl, struct posix_acl **pacl)
+{
+ int retval = 0;
+ mode_t mode = *modep;
+ struct posix_acl *acl = NULL;
+
+ if (!S_ISLNK(mode)) {
+ acl = v9fs_get_cached_acl(dir, ACL_TYPE_DEFAULT);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (!acl)
+ mode &= ~current_umask();
+ }
+ if (acl) {
+ struct posix_acl *clone;
+
+ if (S_ISDIR(mode))
+ *dpacl = acl;
+ clone = posix_acl_clone(acl, GFP_NOFS);
+ retval = -ENOMEM;
+ if (!clone)
+ goto cleanup;
+
+ retval = posix_acl_create_masq(clone, &mode);
+ if (retval < 0) {
+ posix_acl_release(clone);
+ goto cleanup;
+ }
+ if (retval > 0)
+ *pacl = clone;
+ }
+ *modep = mode;
+ return 0;
+cleanup:
+ posix_acl_release(acl);
+ return retval;
+
+}
+
+static int v9fs_remote_get_acl(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ char *full_name;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ full_name = POSIX_ACL_XATTR_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ full_name = POSIX_ACL_XATTR_DEFAULT;
+ break;
+ default:
+ BUG();
+ }
+ return v9fs_xattr_get(dentry, full_name, buffer, size);
+}
+
+static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ struct v9fs_session_info *v9ses;
+ struct posix_acl *acl;
+ int error;
+
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ /*
+ * We allow set/get/list of acl when access=client is not specified
+ */
+ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
+ return v9fs_remote_get_acl(dentry, name, buffer, size, type);
+
+ acl = v9fs_get_cached_acl(dentry->d_inode, type);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl == NULL)
+ return -ENODATA;
+ error = posix_acl_to_xattr(acl, buffer, size);
+ posix_acl_release(acl);
+
+ return error;
+}
+
+static int v9fs_remote_set_acl(struct dentry *dentry, const char *name,
+ const void *value, size_t size,
+ int flags, int type)
+{
+ char *full_name;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ full_name = POSIX_ACL_XATTR_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ full_name = POSIX_ACL_XATTR_DEFAULT;
+ break;
+ default:
+ BUG();
+ }
+ return v9fs_xattr_set(dentry, full_name, value, size, flags);
+}
+
+
+static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
+ const void *value, size_t size,
+ int flags, int type)
+{
+ int retval;
+ struct posix_acl *acl;
+ struct v9fs_session_info *v9ses;
+ struct inode *inode = dentry->d_inode;
+
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ /*
+ * set the attribute on the remote. Without even looking at the
+ * xattr value. We leave it to the server to validate
+ */
+ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
+ return v9fs_remote_set_acl(dentry, name,
+ value, size, flags, type);
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+ if (!is_owner_or_cap(inode))
+ return -EPERM;
+ if (value) {
+ /* update the cached acl value */
+ acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ else if (acl) {
+ retval = posix_acl_valid(acl);
+ if (retval)
+ goto err_out;
+ }
+ } else
+ acl = NULL;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ if (acl) {
+ mode_t mode = inode->i_mode;
+ retval = posix_acl_equiv_mode(acl, &mode);
+ if (retval < 0)
+ goto err_out;
+ else {
+ struct iattr iattr;
+ if (retval == 0) {
+ /*
+ * ACL can be represented
+ * by the mode bits. So don't
+ * update ACL.
+ */
+ acl = NULL;
+ value = NULL;
+ size = 0;
+ }
+ /* Updte the mode bits */
+ iattr.ia_mode = ((mode & S_IALLUGO) |
+ (inode->i_mode & ~S_IALLUGO));
+ iattr.ia_valid = ATTR_MODE;
+ /* FIXME should we update ctime ?
+ * What is the following setxattr update the
+ * mode ?
+ */
+ v9fs_vfs_setattr_dotl(dentry, &iattr);
+ }
+ }
+ break;
+ case ACL_TYPE_DEFAULT:
+ name = POSIX_ACL_XATTR_DEFAULT;
+ if (!S_ISDIR(inode->i_mode)) {
+ retval = acl ? -EINVAL : 0;
+ goto err_out;
+ }
+ break;
+ default:
+ BUG();
+ }
+ retval = v9fs_xattr_set(dentry, name, value, size, flags);
+ if (!retval)
+ set_cached_acl(inode, type, acl);
+err_out:
+ posix_acl_release(acl);
+ return retval;
+}
+
+const struct xattr_handler v9fs_xattr_acl_access_handler = {
+ .prefix = POSIX_ACL_XATTR_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
+ .get = v9fs_xattr_get_acl,
+ .set = v9fs_xattr_set_acl,
+};
+
+const struct xattr_handler v9fs_xattr_acl_default_handler = {
+ .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .flags = ACL_TYPE_DEFAULT,
+ .get = v9fs_xattr_get_acl,
+ .set = v9fs_xattr_set_acl,
+};
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
new file mode 100644
index 0000000..7ef3ac9
--- /dev/null
+++ b/fs/9p/acl.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+#ifndef FS_9P_ACL_H
+#define FS_9P_ACL_H
+
+#ifdef CONFIG_9P_FS_POSIX_ACL
+extern int v9fs_get_acl(struct inode *, struct p9_fid *);
+extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags);
+extern int v9fs_acl_chmod(struct dentry *);
+extern int v9fs_set_create_acl(struct dentry *,
+ struct posix_acl *, struct posix_acl *);
+extern int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+ struct posix_acl **dpacl, struct posix_acl **pacl);
+#else
+#define v9fs_check_acl NULL
+static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
+{
+ return 0;
+}
+static inline int v9fs_acl_chmod(struct dentry *dentry)
+{
+ return 0;
+}
+static inline int v9fs_set_create_acl(struct dentry *dentry,
+ struct posix_acl *dpacl,
+ struct posix_acl *pacl)
+{
+ return 0;
+}
+static inline int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+ struct posix_acl **dpacl,
+ struct posix_acl **pacl)
+{
+ return 0;
+}
+
+#endif
+#endif /* FS_9P_XATTR_H */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 6406f89..b00223c 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -149,6 +149,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
switch (access) {
case V9FS_ACCESS_SINGLE:
case V9FS_ACCESS_USER:
+ case V9FS_ACCESS_CLIENT:
uid = current_fsuid();
any = 0;
break;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 38dc0e0..2f77cd3 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -193,7 +193,17 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
v9ses->flags |= V9FS_ACCESS_USER;
else if (strcmp(s, "any") == 0)
v9ses->flags |= V9FS_ACCESS_ANY;
- else {
+ else if (strcmp(s, "client") == 0) {
+#ifdef CONFIG_9P_FS_POSIX_ACL
+ v9ses->flags |= V9FS_ACCESS_CLIENT;
+#else
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "access=client option not supported\n");
+ kfree(s);
+ ret = -EINVAL;
+ goto free_and_return;
+#endif
+ } else {
v9ses->flags |= V9FS_ACCESS_SINGLE;
v9ses->uid = simple_strtoul(s, &e, 10);
if (*e != '\0')
@@ -278,6 +288,16 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
+ if (!v9fs_proto_dotl(v9ses) &&
+ ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
+ /*
+ * We support ACCESS_CLIENT only for dotl.
+ * Fall back to ACCESS_USER
+ */
+ v9ses->flags &= ~V9FS_ACCESS_MASK;
+ v9ses->flags |= V9FS_ACCESS_USER;
+ }
+ /*FIXME !! */
/* for legacy mode, fall back to V9FS_ACCESS_ANY */
if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) &&
((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) {
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 4c963c9..c4b5d88 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -33,13 +33,17 @@
*
* Session flags reflect options selected by users at mount time
*/
+#define V9FS_ACCESS_ANY (V9FS_ACCESS_SINGLE | \
+ V9FS_ACCESS_USER | \
+ V9FS_ACCESS_CLIENT)
+#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY
+
enum p9_session_flags {
V9FS_PROTO_2000U = 0x01,
V9FS_PROTO_2000L = 0x02,
V9FS_ACCESS_SINGLE = 0x04,
V9FS_ACCESS_USER = 0x08,
- V9FS_ACCESS_ANY = 0x0C,
- V9FS_ACCESS_MASK = 0x0C,
+ V9FS_ACCESS_CLIENT = 0x10
};
/* possible values of ->cache */
@@ -109,11 +113,27 @@ struct v9fs_session_info {
struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
char *);
-void v9fs_session_close(struct v9fs_session_info *v9ses);
-void v9fs_session_cancel(struct v9fs_session_info *v9ses);
-void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
+extern void v9fs_session_close(struct v9fs_session_info *v9ses);
+extern void v9fs_session_cancel(struct v9fs_session_info *v9ses);
+extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
+extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nameidata);
+extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
+extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
+extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry);
+extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
+ void *p);
+extern struct inode *v9fs_inode(struct v9fs_session_info *v9ses,
+ struct p9_fid *fid,
+ struct super_block *sb);
-#define V9FS_MAGIC 0x01021997
+extern const struct inode_operations v9fs_dir_inode_operations_dotl;
+extern const struct inode_operations v9fs_file_inode_operations_dotl;
+extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
+extern struct inode *v9fs_inode_dotl(struct v9fs_session_info *v9ses,
+ struct p9_fid *fid,
+ struct super_block *sb);
/* other default globals */
#define V9FS_PORT 564
@@ -136,3 +156,21 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
{
return v9ses->flags & V9FS_PROTO_2000L;
}
+
+/**
+ * v9fs_inode_from_fid - Helper routine to populate an inode by
+ * issuing a attribute request
+ * @v9ses: session information
+ * @fid: fid to issue attribute request for
+ * @sb: superblock on which to create inode
+ *
+ */
+static inline struct inode *
+v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ if (v9fs_proto_dotl(v9ses))
+ return v9fs_inode_dotl(v9ses, fid, sb);
+ else
+ return v9fs_inode(v9ses, fid, sb);
+}
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 88418c4..bab0eac 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -64,3 +64,7 @@ int v9fs_uflags2omode(int uflags, int extended);
ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
void v9fs_blank_wstat(struct p9_wstat *wstat);
+int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
+int v9fs_file_fsync_dotl(struct file *filp, int datasync);
+
+#define P9_LOCK_TIMEOUT (30*HZ)
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 90e3844..b7f2a8e 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -154,10 +154,40 @@ static int v9fs_launder_page(struct page *page)
return 0;
}
+/**
+ * v9fs_direct_IO - 9P address space operation for direct I/O
+ * @rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * @pos: offset in file to begin the operation
+ * @nr_segs: size of iovec array
+ *
+ * The presence of v9fs_direct_IO() in the address space ops vector
+ * allowes open() O_DIRECT flags which would have failed otherwise.
+ *
+ * In the non-cached mode, we shunt off direct read and write requests before
+ * the VFS gets them, so this method should never be called.
+ *
+ * Direct IO is not 'yet' supported in the cached mode. Hence when
+ * this routine is called through generic_file_aio_read(), the read/write fails
+ * with an error.
+ *
+ */
+ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ loff_t pos, unsigned long nr_segs)
+{
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
+ "off/no(%lld/%lu) EINVAL\n",
+ iocb->ki_filp->f_path.dentry->d_name.name,
+ (long long) pos, nr_segs);
+
+ return -EINVAL;
+}
const struct address_space_operations v9fs_addr_operations = {
.readpage = v9fs_vfs_readpage,
.readpages = v9fs_vfs_readpages,
.releasepage = v9fs_release_page,
.invalidatepage = v9fs_invalidate_page,
.launder_page = v9fs_launder_page,
+ .direct_IO = v9fs_direct_IO,
};
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index cbf4e50..466d2a4 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -51,7 +51,7 @@
*
*/
-static int v9fs_dentry_delete(struct dentry *dentry)
+static int v9fs_dentry_delete(const struct dentry *dentry)
{
P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
dentry);
@@ -68,7 +68,7 @@ static int v9fs_dentry_delete(struct dentry *dentry)
*
*/
-static int v9fs_cached_dentry_delete(struct dentry *dentry)
+static int v9fs_cached_dentry_delete(const struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 899f168..b84ebe8 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -242,7 +242,8 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
while (rdir->head < rdir->tail) {
err = p9dirent_read(rdir->buf + rdir->head,
- buflen - rdir->head, &curdirent,
+ rdir->tail - rdir->head,
+ &curdirent,
fid->clnt->proto_version);
if (err < 0) {
P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
@@ -314,4 +315,5 @@ const struct file_operations v9fs_dir_operations_dotl = {
.readdir = v9fs_dir_readdir_dotl,
.open = v9fs_file_open,
.release = v9fs_dir_release,
+ .fsync = v9fs_file_fsync_dotl,
};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index e97c92b..240c306 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -33,6 +33,7 @@
#include <linux/inet.h>
#include <linux/list.h>
#include <linux/pagemap.h>
+#include <linux/utsname.h>
#include <asm/uaccess.h>
#include <linux/idr.h>
#include <net/9p/9p.h>
@@ -44,6 +45,7 @@
#include "cache.h"
static const struct file_operations v9fs_cached_file_operations;
+static const struct file_operations v9fs_cached_file_operations_dotl;
/**
* v9fs_file_open - open a file (or directory)
@@ -92,6 +94,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
/* enable cached file options */
if(file->f_op == &v9fs_file_operations)
file->f_op = &v9fs_cached_file_operations;
+ else if (file->f_op == &v9fs_file_operations_dotl)
+ file->f_op = &v9fs_cached_file_operations_dotl;
#ifdef CONFIG_9P_FSCACHE
v9fs_cache_inode_set_cookie(inode, file);
@@ -130,6 +134,206 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
return res;
}
+static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+ struct p9_flock flock;
+ struct p9_fid *fid;
+ uint8_t status;
+ int res = 0;
+ unsigned char fl_type;
+
+ fid = filp->private_data;
+ BUG_ON(fid == NULL);
+
+ if ((fl->fl_flags & FL_POSIX) != FL_POSIX)
+ BUG();
+
+ res = posix_lock_file_wait(filp, fl);
+ if (res < 0)
+ goto out;
+
+ /* convert posix lock to p9 tlock args */
+ memset(&flock, 0, sizeof(flock));
+ flock.type = fl->fl_type;
+ flock.start = fl->fl_start;
+ if (fl->fl_end == OFFSET_MAX)
+ flock.length = 0;
+ else
+ flock.length = fl->fl_end - fl->fl_start + 1;
+ flock.proc_id = fl->fl_pid;
+ flock.client_id = utsname()->nodename;
+ if (IS_SETLKW(cmd))
+ flock.flags = P9_LOCK_FLAGS_BLOCK;
+
+ /*
+ * if its a blocked request and we get P9_LOCK_BLOCKED as the status
+ * for lock request, keep on trying
+ */
+ for (;;) {
+ res = p9_client_lock_dotl(fid, &flock, &status);
+ if (res < 0)
+ break;
+
+ if (status != P9_LOCK_BLOCKED)
+ break;
+ if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd))
+ break;
+ schedule_timeout_interruptible(P9_LOCK_TIMEOUT);
+ }
+
+ /* map 9p status to VFS status */
+ switch (status) {
+ case P9_LOCK_SUCCESS:
+ res = 0;
+ break;
+ case P9_LOCK_BLOCKED:
+ res = -EAGAIN;
+ break;
+ case P9_LOCK_ERROR:
+ case P9_LOCK_GRACE:
+ res = -ENOLCK;
+ break;
+ default:
+ BUG();
+ }
+
+ /*
+ * incase server returned error for lock request, revert
+ * it locally
+ */
+ if (res < 0 && fl->fl_type != F_UNLCK) {
+ fl_type = fl->fl_type;
+ fl->fl_type = F_UNLCK;
+ res = posix_lock_file_wait(filp, fl);
+ fl->fl_type = fl_type;
+ }
+out:
+ return res;
+}
+
+static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
+{
+ struct p9_getlock glock;
+ struct p9_fid *fid;
+ int res = 0;
+
+ fid = filp->private_data;
+ BUG_ON(fid == NULL);
+
+ posix_test_lock(filp, fl);
+ /*
+ * if we have a conflicting lock locally, no need to validate
+ * with server
+ */
+ if (fl->fl_type != F_UNLCK)
+ return res;
+
+ /* convert posix lock to p9 tgetlock args */
+ memset(&glock, 0, sizeof(glock));
+ glock.type = fl->fl_type;
+ glock.start = fl->fl_start;
+ if (fl->fl_end == OFFSET_MAX)
+ glock.length = 0;
+ else
+ glock.length = fl->fl_end - fl->fl_start + 1;
+ glock.proc_id = fl->fl_pid;
+ glock.client_id = utsname()->nodename;
+
+ res = p9_client_getlock_dotl(fid, &glock);
+ if (res < 0)
+ return res;
+ if (glock.type != F_UNLCK) {
+ fl->fl_type = glock.type;
+ fl->fl_start = glock.start;
+ if (glock.length == 0)
+ fl->fl_end = OFFSET_MAX;
+ else
+ fl->fl_end = glock.start + glock.length - 1;
+ fl->fl_pid = glock.proc_id;
+ } else
+ fl->fl_type = F_UNLCK;
+
+ return res;
+}
+
+/**
+ * v9fs_file_lock_dotl - lock a file (or directory)
+ * @filp: file to be locked
+ * @cmd: lock command
+ * @fl: file lock structure
+ *
+ */
+
+static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
+{
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ int ret = -ENOLCK;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
+ cmd, fl, filp->f_path.dentry->d_name.name);
+
+ /* No mandatory locks */
+ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
+ goto out_err;
+
+ if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+ filemap_write_and_wait(inode->i_mapping);
+ invalidate_mapping_pages(&inode->i_data, 0, -1);
+ }
+
+ if (IS_SETLK(cmd) || IS_SETLKW(cmd))
+ ret = v9fs_file_do_lock(filp, cmd, fl);
+ else if (IS_GETLK(cmd))
+ ret = v9fs_file_getlock(filp, fl);
+ else
+ ret = -EINVAL;
+out_err:
+ return ret;
+}
+
+/**
+ * v9fs_file_flock_dotl - lock a file
+ * @filp: file to be locked
+ * @cmd: lock command
+ * @fl: file lock structure
+ *
+ */
+
+static int v9fs_file_flock_dotl(struct file *filp, int cmd,
+ struct file_lock *fl)
+{
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ int ret = -ENOLCK;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
+ cmd, fl, filp->f_path.dentry->d_name.name);
+
+ /* No mandatory locks */
+ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
+ goto out_err;
+
+ if (!(fl->fl_flags & FL_FLOCK))
+ goto out_err;
+
+ if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+ filemap_write_and_wait(inode->i_mapping);
+ invalidate_mapping_pages(&inode->i_data, 0, -1);
+ }
+ /* Convert flock to posix lock */
+ fl->fl_owner = (fl_owner_t)filp;
+ fl->fl_start = 0;
+ fl->fl_end = OFFSET_MAX;
+ fl->fl_flags |= FL_POSIX;
+ fl->fl_flags ^= FL_FLOCK;
+
+ if (IS_SETLK(cmd) | IS_SETLKW(cmd))
+ ret = v9fs_file_do_lock(filp, cmd, fl);
+ else
+ ret = -EINVAL;
+out_err:
+ return ret;
+}
+
/**
* v9fs_file_readn - read from a file
* @filp: file pointer to read
@@ -219,7 +423,9 @@ static ssize_t
v9fs_file_write(struct file *filp, const char __user * data,
size_t count, loff_t * offset)
{
- int n, rsize, total = 0;
+ ssize_t retval;
+ size_t total = 0;
+ int n;
struct p9_fid *fid;
struct p9_client *clnt;
struct inode *inode = filp->f_path.dentry->d_inode;
@@ -232,14 +438,19 @@ v9fs_file_write(struct file *filp, const char __user * data,
fid = filp->private_data;
clnt = fid->clnt;
- rsize = fid->iounit ? fid->iounit : clnt->msize - P9_IOHDRSZ;
+ retval = generic_write_checks(filp, &origin, &count, 0);
+ if (retval)
+ goto out;
- do {
- if (count < rsize)
- rsize = count;
+ retval = -EINVAL;
+ if ((ssize_t) count < 0)
+ goto out;
+ retval = 0;
+ if (!count)
+ goto out;
- n = p9_client_write(fid, NULL, data+total, origin+total,
- rsize);
+ do {
+ n = p9_client_write(fid, NULL, data+total, origin+total, count);
if (n <= 0)
break;
count -= n;
@@ -258,9 +469,11 @@ v9fs_file_write(struct file *filp, const char __user * data,
}
if (n < 0)
- return n;
-
- return total;
+ retval = n;
+ else
+ retval = total;
+out:
+ return retval;
}
static int v9fs_file_fsync(struct file *filp, int datasync)
@@ -278,6 +491,20 @@ static int v9fs_file_fsync(struct file *filp, int datasync)
return retval;
}
+int v9fs_file_fsync_dotl(struct file *filp, int datasync)
+{
+ struct p9_fid *fid;
+ int retval;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
+ filp, datasync);
+
+ fid = filp->private_data;
+
+ retval = p9_client_fsync(fid, datasync);
+ return retval;
+}
+
static const struct file_operations v9fs_cached_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -290,6 +517,19 @@ static const struct file_operations v9fs_cached_file_operations = {
.fsync = v9fs_file_fsync,
};
+static const struct file_operations v9fs_cached_file_operations_dotl = {
+ .llseek = generic_file_llseek,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = v9fs_file_write,
+ .open = v9fs_file_open,
+ .release = v9fs_dir_release,
+ .lock = v9fs_file_lock_dotl,
+ .flock = v9fs_file_flock_dotl,
+ .mmap = generic_file_readonly_mmap,
+ .fsync = v9fs_file_fsync_dotl,
+};
+
const struct file_operations v9fs_file_operations = {
.llseek = generic_file_llseek,
.read = v9fs_file_read,
@@ -307,7 +547,8 @@ const struct file_operations v9fs_file_operations_dotl = {
.write = v9fs_file_write,
.open = v9fs_file_open,
.release = v9fs_dir_release,
- .lock = v9fs_file_lock,
+ .lock = v9fs_file_lock_dotl,
+ .flock = v9fs_file_flock_dotl,
.mmap = generic_file_readonly_mmap,
- .fsync = v9fs_file_fsync,
+ .fsync = v9fs_file_fsync_dotl,
};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index ef5905f..5076eeb 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -36,6 +36,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/xattr.h>
+#include <linux/posix_acl.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -44,14 +45,12 @@
#include "fid.h"
#include "cache.h"
#include "xattr.h"
+#include "acl.h"
static const struct inode_operations v9fs_dir_inode_operations;
static const struct inode_operations v9fs_dir_inode_operations_dotu;
-static const struct inode_operations v9fs_dir_inode_operations_dotl;
static const struct inode_operations v9fs_file_inode_operations;
-static const struct inode_operations v9fs_file_inode_operations_dotl;
static const struct inode_operations v9fs_symlink_inode_operations;
-static const struct inode_operations v9fs_symlink_inode_operations_dotl;
/**
* unixmode2p9mode - convert unix mode bits to plan 9
@@ -231,46 +230,18 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
*
*/
-void v9fs_destroy_inode(struct inode *inode)
+static void v9fs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
}
-#endif
-
-/**
- * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
- * new file system object. This checks the S_ISGID to determine the owning
- * group of the new file system object.
- */
-
-static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
-{
- BUG_ON(dir_inode == NULL);
-
- if (dir_inode->i_mode & S_ISGID) {
- /* set_gid bit is set.*/
- return dir_inode->i_gid;
- }
- return current_fsgid();
-}
-
-/**
- * v9fs_dentry_from_dir_inode - helper function to get the dentry from
- * dir inode.
- *
- */
-static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
+void v9fs_destroy_inode(struct inode *inode)
{
- struct dentry *dentry;
-
- spin_lock(&dcache_lock);
- /* Directory should have only one entry. */
- BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
- dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- spin_unlock(&dcache_lock);
- return dentry;
+ call_rcu(&inode->i_rcu, v9fs_i_callback);
}
+#endif
/**
* v9fs_get_inode - helper function to setup an inode
@@ -441,7 +412,7 @@ void v9fs_evict_inode(struct inode *inode)
#endif
}
-static struct inode *
+struct inode *
v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid,
struct super_block *sb)
{
@@ -476,55 +447,6 @@ error:
return ERR_PTR(err);
}
-static struct inode *
-v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
-{
- struct inode *ret = NULL;
- int err;
- struct p9_stat_dotl *st;
-
- st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
- if (IS_ERR(st))
- return ERR_CAST(st);
-
- ret = v9fs_get_inode(sb, st->st_mode);
- if (IS_ERR(ret)) {
- err = PTR_ERR(ret);
- goto error;
- }
-
- v9fs_stat2inode_dotl(st, ret);
- ret->i_ino = v9fs_qid2ino(&st->qid);
-#ifdef CONFIG_9P_FSCACHE
- v9fs_vcookie_set_qid(ret, &st->qid);
- v9fs_cache_inode_get_cookie(ret);
-#endif
- kfree(st);
- return ret;
-error:
- kfree(st);
- return ERR_PTR(err);
-}
-
-/**
- * v9fs_inode_from_fid - Helper routine to populate an inode by
- * issuing a attribute request
- * @v9ses: session information
- * @fid: fid to issue attribute request for
- * @sb: superblock on which to create inode
- *
- */
-static inline struct inode *
-v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
-{
- if (v9fs_proto_dotl(v9ses))
- return v9fs_inode_dotl(v9ses, fid, sb);
- else
- return v9fs_inode(v9ses, fid, sb);
-}
-
/**
* v9fs_remove - helper function to remove files and directories
* @dir: directory inode that is being deleted
@@ -553,13 +475,6 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
return retval;
}
-static int
-v9fs_open_created(struct inode *inode, struct file *file)
-{
- return 0;
-}
-
-
/**
* v9fs_create - Create a file
* @v9ses: session information
@@ -622,12 +537,6 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
goto error;
}
-
- if (v9ses->cache)
- dentry->d_op = &v9fs_cached_dentry_operations;
- else
- dentry->d_op = &v9fs_dentry_operations;
-
d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
if (err < 0)
@@ -646,121 +555,6 @@ error:
}
/**
- * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
- * @dir: directory inode that is being created
- * @dentry: dentry that is being deleted
- * @mode: create permissions
- * @nd: path information
- *
- */
-
-static int
-v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
- struct nameidata *nd)
-{
- int err = 0;
- char *name = NULL;
- gid_t gid;
- int flags;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid = NULL;
- struct p9_fid *dfid, *ofid;
- struct file *filp;
- struct p9_qid qid;
- struct inode *inode;
-
- v9ses = v9fs_inode2v9ses(dir);
- if (nd && nd->flags & LOOKUP_OPEN)
- flags = nd->intent.open.flags - 1;
- else
- flags = O_RDWR;
-
- name = (char *) dentry->d_name.name;
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
- "mode:0x%x\n", name, flags, mode);
-
- dfid = v9fs_fid_lookup(dentry->d_parent);
- if (IS_ERR(dfid)) {
- err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
- return err;
- }
-
- /* clone a fid to use for creation */
- ofid = p9_client_walk(dfid, 0, NULL, 1);
- if (IS_ERR(ofid)) {
- err = PTR_ERR(ofid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
- return err;
- }
-
- gid = v9fs_get_fsgid_for_create(dir);
- err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
- if (err < 0) {
- P9_DPRINTK(P9_DEBUG_VFS,
- "p9_client_open_dotl failed in creat %d\n",
- err);
- goto error;
- }
-
- /* No need to populate the inode if we are not opening the file AND
- * not in cached mode.
- */
- if (!v9ses->cache && !(nd && nd->flags & LOOKUP_OPEN)) {
- /* Not in cached mode. No need to populate inode with stat */
- dentry->d_op = &v9fs_dentry_operations;
- p9_client_clunk(ofid);
- d_instantiate(dentry, NULL);
- return 0;
- }
-
- /* Now walk from the parent so we can get an unopened fid. */
- fid = p9_client_walk(dfid, 1, &name, 1);
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
- fid = NULL;
- goto error;
- }
-
- /* instantiate inode and assign the unopened fid to dentry */
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
- goto error;
- }
- if (v9ses->cache)
- dentry->d_op = &v9fs_cached_dentry_operations;
- else
- dentry->d_op = &v9fs_dentry_operations;
- d_instantiate(dentry, inode);
- err = v9fs_fid_add(dentry, fid);
- if (err < 0)
- goto error;
-
- /* if we are opening a file, assign the open fid to the file */
- if (nd && nd->flags & LOOKUP_OPEN) {
- filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
- if (IS_ERR(filp)) {
- p9_client_clunk(ofid);
- return PTR_ERR(filp);
- }
- filp->private_data = ofid;
- } else
- p9_client_clunk(ofid);
-
- return 0;
-
-error:
- if (ofid)
- p9_client_clunk(ofid);
- if (fid)
- p9_client_clunk(fid);
- return err;
-}
-
-/**
* v9fs_vfs_create - VFS hook to create files
* @dir: directory inode that is being created
* @dentry: dentry that is being deleted
@@ -800,7 +594,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
/* if we are opening a file, assign the open fid to the file */
if (nd && nd->flags & LOOKUP_OPEN) {
- filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
+ filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
if (IS_ERR(filp)) {
err = PTR_ERR(filp);
goto error;
@@ -850,83 +644,6 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
return err;
}
-
-/**
- * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
- * @dir: inode that is being unlinked
- * @dentry: dentry that is being unlinked
- * @mode: mode for new directory
- *
- */
-
-static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
- int mode)
-{
- int err;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid = NULL, *dfid = NULL;
- gid_t gid;
- char *name;
- struct inode *inode;
- struct p9_qid qid;
- struct dentry *dir_dentry;
-
- P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
- err = 0;
- v9ses = v9fs_inode2v9ses(dir);
-
- mode |= S_IFDIR;
- dir_dentry = v9fs_dentry_from_dir_inode(dir);
- dfid = v9fs_fid_lookup(dir_dentry);
- if (IS_ERR(dfid)) {
- err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
- dfid = NULL;
- goto error;
- }
-
- gid = v9fs_get_fsgid_for_create(dir);
- if (gid < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
- goto error;
- }
-
- name = (char *) dentry->d_name.name;
- err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
- if (err < 0)
- goto error;
-
- /* instantiate inode and assign the unopened fid to the dentry */
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- fid = p9_client_walk(dfid, 1, &name, 1);
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
- err);
- fid = NULL;
- goto error;
- }
-
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
- err);
- goto error;
- }
- dentry->d_op = &v9fs_cached_dentry_operations;
- d_instantiate(dentry, inode);
- err = v9fs_fid_add(dentry, fid);
- if (err < 0)
- goto error;
- fid = NULL;
- }
-error:
- if (fid)
- p9_client_clunk(fid);
- return err;
-}
-
/**
* v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
* @dir: inode that is being walked from
@@ -935,7 +652,7 @@ error:
*
*/
-static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
+struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nameidata)
{
struct super_block *sb;
@@ -979,17 +696,19 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
result = v9fs_fid_add(dentry, fid);
if (result < 0)
- goto error;
+ goto error_iput;
inst_out:
if (v9ses->cache)
- dentry->d_op = &v9fs_cached_dentry_operations;
+ d_set_d_op(dentry, &v9fs_cached_dentry_operations);
else
- dentry->d_op = &v9fs_dentry_operations;
+ d_set_d_op(dentry, &v9fs_dentry_operations);
d_add(dentry, inode);
return NULL;
+error_iput:
+ iput(inode);
error:
p9_client_clunk(fid);
@@ -1003,7 +722,7 @@ error:
*
*/
-static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
+int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
{
return v9fs_remove(i, d, 0);
}
@@ -1015,7 +734,7 @@ static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
*
*/
-static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
+int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
{
return v9fs_remove(i, d, 1);
}
@@ -1029,7 +748,7 @@ static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
*
*/
-static int
+int
v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
@@ -1136,42 +855,6 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
return 0;
}
-static int
-v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
-{
- int err;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid;
- struct p9_stat_dotl *st;
-
- P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
- err = -EPERM;
- v9ses = v9fs_inode2v9ses(dentry->d_inode);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- return simple_getattr(mnt, dentry, stat);
-
- fid = v9fs_fid_lookup(dentry);
- if (IS_ERR(fid))
- return PTR_ERR(fid);
-
- /* Ask for all the fields in stat structure. Server will return
- * whatever it supports
- */
-
- st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
- if (IS_ERR(st))
- return PTR_ERR(st);
-
- v9fs_stat2inode_dotl(st, dentry->d_inode);
- generic_fillattr(dentry->d_inode, stat);
- /* Change block size to what the server returned */
- stat->blksize = st->st_blksize;
-
- kfree(st);
- return 0;
-}
-
/**
* v9fs_vfs_setattr - set file metadata
* @dentry: file whose metadata to set
@@ -1231,58 +914,6 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
}
/**
- * v9fs_vfs_setattr_dotl - set file metadata
- * @dentry: file whose metadata to set
- * @iattr: metadata assignment structure
- *
- */
-
-static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
-{
- int retval;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid;
- struct p9_iattr_dotl p9attr;
-
- P9_DPRINTK(P9_DEBUG_VFS, "\n");
-
- retval = inode_change_ok(dentry->d_inode, iattr);
- if (retval)
- return retval;
-
- p9attr.valid = iattr->ia_valid;
- p9attr.mode = iattr->ia_mode;
- p9attr.uid = iattr->ia_uid;
- p9attr.gid = iattr->ia_gid;
- p9attr.size = iattr->ia_size;
- p9attr.atime_sec = iattr->ia_atime.tv_sec;
- p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
- p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
- p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
-
- retval = -EPERM;
- v9ses = v9fs_inode2v9ses(dentry->d_inode);
- fid = v9fs_fid_lookup(dentry);
- if (IS_ERR(fid))
- return PTR_ERR(fid);
-
- retval = p9_client_setattr(fid, &p9attr);
- if (retval < 0)
- return retval;
-
- if ((iattr->ia_valid & ATTR_SIZE) &&
- iattr->ia_size != i_size_read(dentry->d_inode)) {
- retval = vmtruncate(dentry->d_inode, iattr->ia_size);
- if (retval)
- return retval;
- }
-
- setattr_copy(dentry->d_inode, iattr);
- mark_inode_dirty(dentry->d_inode);
- return 0;
-}
-
-/**
* v9fs_stat2inode - populate an inode structure with mistat info
* @stat: Plan 9 metadata (mistat) structure
* @inode: inode to populate
@@ -1360,77 +991,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
}
/**
- * v9fs_stat2inode_dotl - populate an inode structure with stat info
- * @stat: stat structure
- * @inode: inode to populate
- * @sb: superblock of filesystem
- *
- */
-
-void
-v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
-{
-
- if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
- inode->i_atime.tv_sec = stat->st_atime_sec;
- inode->i_atime.tv_nsec = stat->st_atime_nsec;
- inode->i_mtime.tv_sec = stat->st_mtime_sec;
- inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
- inode->i_ctime.tv_sec = stat->st_ctime_sec;
- inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
- inode->i_uid = stat->st_uid;
- inode->i_gid = stat->st_gid;
- inode->i_nlink = stat->st_nlink;
- inode->i_mode = stat->st_mode;
- inode->i_rdev = new_decode_dev(stat->st_rdev);
-
- if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
- init_special_inode(inode, inode->i_mode, inode->i_rdev);
-
- i_size_write(inode, stat->st_size);
- inode->i_blocks = stat->st_blocks;
- } else {
- if (stat->st_result_mask & P9_STATS_ATIME) {
- inode->i_atime.tv_sec = stat->st_atime_sec;
- inode->i_atime.tv_nsec = stat->st_atime_nsec;
- }
- if (stat->st_result_mask & P9_STATS_MTIME) {
- inode->i_mtime.tv_sec = stat->st_mtime_sec;
- inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
- }
- if (stat->st_result_mask & P9_STATS_CTIME) {
- inode->i_ctime.tv_sec = stat->st_ctime_sec;
- inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
- }
- if (stat->st_result_mask & P9_STATS_UID)
- inode->i_uid = stat->st_uid;
- if (stat->st_result_mask & P9_STATS_GID)
- inode->i_gid = stat->st_gid;
- if (stat->st_result_mask & P9_STATS_NLINK)
- inode->i_nlink = stat->st_nlink;
- if (stat->st_result_mask & P9_STATS_MODE) {
- inode->i_mode = stat->st_mode;
- if ((S_ISBLK(inode->i_mode)) ||
- (S_ISCHR(inode->i_mode)))
- init_special_inode(inode, inode->i_mode,
- inode->i_rdev);
- }
- if (stat->st_result_mask & P9_STATS_RDEV)
- inode->i_rdev = new_decode_dev(stat->st_rdev);
- if (stat->st_result_mask & P9_STATS_SIZE)
- i_size_write(inode, stat->st_size);
- if (stat->st_result_mask & P9_STATS_BLOCKS)
- inode->i_blocks = stat->st_blocks;
- }
- if (stat->st_result_mask & P9_STATS_GEN)
- inode->i_generation = stat->st_gen;
-
- /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
- * because the inode structure does not have fields for them.
- */
-}
-
-/**
* v9fs_qid2ino - convert qid into inode number
* @qid: qid to hash
*
@@ -1473,7 +1033,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
if (IS_ERR(fid))
return PTR_ERR(fid);
- if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))
+ if (!v9fs_proto_dotu(v9ses))
return -EBADF;
st = p9_client_stat(fid);
@@ -1536,7 +1096,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
*
*/
-static void
+void
v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
{
char *s = nd_get_link(nd);
@@ -1580,99 +1140,6 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
}
/**
- * v9fs_vfs_symlink_dotl - helper function to create symlinks
- * @dir: directory inode containing symlink
- * @dentry: dentry for symlink
- * @symname: symlink data
- *
- * See Also: 9P2000.L RFC for more information
- *
- */
-
-static int
-v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
- const char *symname)
-{
- struct v9fs_session_info *v9ses;
- struct p9_fid *dfid;
- struct p9_fid *fid = NULL;
- struct inode *inode;
- struct p9_qid qid;
- char *name;
- int err;
- gid_t gid;
-
- name = (char *) dentry->d_name.name;
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
- dir->i_ino, name, symname);
- v9ses = v9fs_inode2v9ses(dir);
-
- dfid = v9fs_fid_lookup(dentry->d_parent);
- if (IS_ERR(dfid)) {
- err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
- return err;
- }
-
- gid = v9fs_get_fsgid_for_create(dir);
-
- if (gid < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_egid failed %d\n", gid);
- goto error;
- }
-
- /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
- err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
-
- if (err < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
- goto error;
- }
-
- if (v9ses->cache) {
- /* Now walk from the parent so we can get an unopened fid. */
- fid = p9_client_walk(dfid, 1, &name, 1);
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
- err);
- fid = NULL;
- goto error;
- }
-
- /* instantiate inode and assign the unopened fid to dentry */
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
- err);
- goto error;
- }
- dentry->d_op = &v9fs_cached_dentry_operations;
- d_instantiate(dentry, inode);
- err = v9fs_fid_add(dentry, fid);
- if (err < 0)
- goto error;
- fid = NULL;
- } else {
- /* Not in cached mode. No need to populate inode with stat */
- inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto error;
- }
- dentry->d_op = &v9fs_dentry_operations;
- d_instantiate(dentry, inode);
- }
-
-error:
- if (fid)
- p9_client_clunk(fid);
-
- return err;
-}
-
-/**
* v9fs_vfs_symlink - helper function to create symlinks
* @dir: directory inode containing symlink
* @dentry: dentry for symlink
@@ -1731,77 +1198,6 @@ clunk_fid:
}
/**
- * v9fs_vfs_link_dotl - create a hardlink for dotl
- * @old_dentry: dentry for file to link to
- * @dir: inode destination for new link
- * @dentry: dentry for link
- *
- */
-
-static int
-v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
- struct dentry *dentry)
-{
- int err;
- struct p9_fid *dfid, *oldfid;
- char *name;
- struct v9fs_session_info *v9ses;
- struct dentry *dir_dentry;
-
- P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
- dir->i_ino, old_dentry->d_name.name,
- dentry->d_name.name);
-
- v9ses = v9fs_inode2v9ses(dir);
- dir_dentry = v9fs_dentry_from_dir_inode(dir);
- dfid = v9fs_fid_lookup(dir_dentry);
- if (IS_ERR(dfid))
- return PTR_ERR(dfid);
-
- oldfid = v9fs_fid_lookup(old_dentry);
- if (IS_ERR(oldfid))
- return PTR_ERR(oldfid);
-
- name = (char *) dentry->d_name.name;
-
- err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
-
- if (err < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
- return err;
- }
-
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- /* Get the latest stat info from server. */
- struct p9_fid *fid;
- struct p9_stat_dotl *st;
-
- fid = v9fs_fid_lookup(old_dentry);
- if (IS_ERR(fid))
- return PTR_ERR(fid);
-
- st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
- if (IS_ERR(st))
- return PTR_ERR(st);
-
- v9fs_stat2inode_dotl(st, old_dentry->d_inode);
-
- kfree(st);
- } else {
- /* Caching disabled. No need to get upto date stat info.
- * This dentry will be released immediately. So, just hold the
- * inode
- */
- ihold(old_dentry->d_inode);
- }
-
- dentry->d_op = old_dentry->d_op;
- d_instantiate(dentry, old_dentry->d_inode);
-
- return err;
-}
-
-/**
* v9fs_vfs_mknod - create a special file
* @dir: inode destination for new link
* @dentry: dentry for file
@@ -1846,100 +1242,6 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
return retval;
}
-/**
- * v9fs_vfs_mknod_dotl - create a special file
- * @dir: inode destination for new link
- * @dentry: dentry for file
- * @mode: mode for creation
- * @rdev: device associated with special file
- *
- */
-static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
- dev_t rdev)
-{
- int err;
- char *name;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid = NULL, *dfid = NULL;
- struct inode *inode;
- gid_t gid;
- struct p9_qid qid;
- struct dentry *dir_dentry;
-
- P9_DPRINTK(P9_DEBUG_VFS,
- " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
- dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
-
- if (!new_valid_dev(rdev))
- return -EINVAL;
-
- v9ses = v9fs_inode2v9ses(dir);
- dir_dentry = v9fs_dentry_from_dir_inode(dir);
- dfid = v9fs_fid_lookup(dir_dentry);
- if (IS_ERR(dfid)) {
- err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
- dfid = NULL;
- goto error;
- }
-
- gid = v9fs_get_fsgid_for_create(dir);
- if (gid < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
- goto error;
- }
-
- name = (char *) dentry->d_name.name;
-
- err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
- if (err < 0)
- goto error;
-
- /* instantiate inode and assign the unopened fid to the dentry */
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- fid = p9_client_walk(dfid, 1, &name, 1);
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
- err);
- fid = NULL;
- goto error;
- }
-
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
- err);
- goto error;
- }
- dentry->d_op = &v9fs_cached_dentry_operations;
- d_instantiate(dentry, inode);
- err = v9fs_fid_add(dentry, fid);
- if (err < 0)
- goto error;
- fid = NULL;
- } else {
- /*
- * Not in cached mode. No need to populate inode with stat.
- * socket syscall returns a fd, so we need instantiate
- */
- inode = v9fs_get_inode(dir->i_sb, mode);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto error;
- }
- dentry->d_op = &v9fs_dentry_operations;
- d_instantiate(dentry, inode);
- }
-
-error:
- if (fid)
- p9_client_clunk(fid);
- return err;
-}
-
static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
@@ -1954,25 +1256,6 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.setattr = v9fs_vfs_setattr,
};
-static const struct inode_operations v9fs_dir_inode_operations_dotl = {
- .create = v9fs_vfs_create_dotl,
- .lookup = v9fs_vfs_lookup,
- .link = v9fs_vfs_link_dotl,
- .symlink = v9fs_vfs_symlink_dotl,
- .unlink = v9fs_vfs_unlink,
- .mkdir = v9fs_vfs_mkdir_dotl,
- .rmdir = v9fs_vfs_rmdir,
- .mknod = v9fs_vfs_mknod_dotl,
- .rename = v9fs_vfs_rename,
- .getattr = v9fs_vfs_getattr_dotl,
- .setattr = v9fs_vfs_setattr_dotl,
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .removexattr = generic_removexattr,
- .listxattr = v9fs_listxattr,
-
-};
-
static const struct inode_operations v9fs_dir_inode_operations = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
@@ -1990,15 +1273,6 @@ static const struct inode_operations v9fs_file_inode_operations = {
.setattr = v9fs_vfs_setattr,
};
-static const struct inode_operations v9fs_file_inode_operations_dotl = {
- .getattr = v9fs_vfs_getattr_dotl,
- .setattr = v9fs_vfs_setattr_dotl,
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .removexattr = generic_removexattr,
- .listxattr = v9fs_listxattr,
-};
-
static const struct inode_operations v9fs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = v9fs_vfs_follow_link,
@@ -2007,14 +1281,3 @@ static const struct inode_operations v9fs_symlink_inode_operations = {
.setattr = v9fs_vfs_setattr,
};
-static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
- .readlink = generic_readlink,
- .follow_link = v9fs_vfs_follow_link,
- .put_link = v9fs_vfs_put_link,
- .getattr = v9fs_vfs_getattr_dotl,
- .setattr = v9fs_vfs_setattr_dotl,
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .removexattr = generic_removexattr,
- .listxattr = v9fs_listxattr,
-};
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
new file mode 100644
index 0000000..fe3ffa9
--- /dev/null
+++ b/fs/9p/vfs_inode_dotl.c
@@ -0,0 +1,824 @@
+/*
+ * linux/fs/9p/vfs_inode_dotl.c
+ *
+ * This file contains vfs inode ops for the 9P2000.L protocol.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/inet.h>
+#include <linux/namei.h>
+#include <linux/idr.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+
+#include "v9fs.h"
+#include "v9fs_vfs.h"
+#include "fid.h"
+#include "cache.h"
+#include "xattr.h"
+#include "acl.h"
+
+static int
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
+ dev_t rdev);
+
+/**
+ * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
+ * new file system object. This checks the S_ISGID to determine the owning
+ * group of the new file system object.
+ */
+
+static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
+{
+ BUG_ON(dir_inode == NULL);
+
+ if (dir_inode->i_mode & S_ISGID) {
+ /* set_gid bit is set.*/
+ return dir_inode->i_gid;
+ }
+ return current_fsgid();
+}
+
+/**
+ * v9fs_dentry_from_dir_inode - helper function to get the dentry from
+ * dir inode.
+ *
+ */
+
+static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
+{
+ struct dentry *dentry;
+
+ spin_lock(&inode->i_lock);
+ /* Directory should have only one entry. */
+ BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
+ dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+ spin_unlock(&inode->i_lock);
+ return dentry;
+}
+
+struct inode *
+v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ struct inode *ret = NULL;
+ int err;
+ struct p9_stat_dotl *st;
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st))
+ return ERR_CAST(st);
+
+ ret = v9fs_get_inode(sb, st->st_mode);
+ if (IS_ERR(ret)) {
+ err = PTR_ERR(ret);
+ goto error;
+ }
+
+ v9fs_stat2inode_dotl(st, ret);
+ ret->i_ino = v9fs_qid2ino(&st->qid);
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_vcookie_set_qid(ret, &st->qid);
+ v9fs_cache_inode_get_cookie(ret);
+#endif
+ err = v9fs_get_acl(ret, fid);
+ if (err) {
+ iput(ret);
+ goto error;
+ }
+ kfree(st);
+ return ret;
+error:
+ kfree(st);
+ return ERR_PTR(err);
+}
+
+/**
+ * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
+ * @dir: directory inode that is being created
+ * @dentry: dentry that is being deleted
+ * @mode: create permissions
+ * @nd: path information
+ *
+ */
+
+static int
+v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
+ struct nameidata *nd)
+{
+ int err = 0;
+ char *name = NULL;
+ gid_t gid;
+ int flags;
+ mode_t mode;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL;
+ struct p9_fid *dfid, *ofid;
+ struct file *filp;
+ struct p9_qid qid;
+ struct inode *inode;
+ struct posix_acl *pacl = NULL, *dacl = NULL;
+
+ v9ses = v9fs_inode2v9ses(dir);
+ if (nd && nd->flags & LOOKUP_OPEN)
+ flags = nd->intent.open.flags - 1;
+ else {
+ /*
+ * create call without LOOKUP_OPEN is due
+ * to mknod of regular files. So use mknod
+ * operation.
+ */
+ return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
+ }
+
+ name = (char *) dentry->d_name.name;
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
+ "mode:0x%x\n", name, flags, omode);
+
+ dfid = v9fs_fid_lookup(dentry->d_parent);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ return err;
+ }
+
+ /* clone a fid to use for creation */
+ ofid = p9_client_walk(dfid, 0, NULL, 1);
+ if (IS_ERR(ofid)) {
+ err = PTR_ERR(ofid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ return err;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+
+ mode = omode;
+ /* Update mode based on ACL value */
+ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
+ if (err) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "Failed to get acl values in creat %d\n", err);
+ goto error;
+ }
+ err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "p9_client_open_dotl failed in creat %d\n",
+ err);
+ goto error;
+ }
+
+ /* instantiate inode and assign the unopened fid to the dentry */
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ fid = NULL;
+ goto error;
+ }
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+
+ /* Now set the ACL based on the default value */
+ v9fs_set_create_acl(dentry, dacl, pacl);
+
+ /* Since we are opening a file, assign the open fid to the file */
+ filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
+ if (IS_ERR(filp)) {
+ p9_client_clunk(ofid);
+ return PTR_ERR(filp);
+ }
+ filp->private_data = ofid;
+ return 0;
+
+error:
+ if (ofid)
+ p9_client_clunk(ofid);
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
+/**
+ * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
+ * @dir: inode that is being unlinked
+ * @dentry: dentry that is being unlinked
+ * @mode: mode for new directory
+ *
+ */
+
+static int v9fs_vfs_mkdir_dotl(struct inode *dir,
+ struct dentry *dentry, int omode)
+{
+ int err;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL, *dfid = NULL;
+ gid_t gid;
+ char *name;
+ mode_t mode;
+ struct inode *inode;
+ struct p9_qid qid;
+ struct dentry *dir_dentry;
+ struct posix_acl *dacl = NULL, *pacl = NULL;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+ err = 0;
+ v9ses = v9fs_inode2v9ses(dir);
+
+ omode |= S_IFDIR;
+ if (dir->i_mode & S_ISGID)
+ omode |= S_ISGID;
+
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ dfid = NULL;
+ goto error;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ mode = omode;
+ /* Update mode based on ACL value */
+ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
+ if (err) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "Failed to get acl values in mkdir %d\n", err);
+ goto error;
+ }
+ name = (char *) dentry->d_name.name;
+ err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
+ if (err < 0)
+ goto error;
+
+ /* instantiate inode and assign the unopened fid to the dentry */
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ } else {
+ /*
+ * Not in cached mode. No need to populate
+ * inode with stat. We need to get an inode
+ * so that we can set the acl with dentry
+ */
+ inode = v9fs_get_inode(dir->i_sb, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ }
+ /* Now set the ACL based on the default value */
+ v9fs_set_create_acl(dentry, dacl, pacl);
+
+error:
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
+static int
+v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ int err;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
+ struct p9_stat_dotl *st;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
+ err = -EPERM;
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+ return simple_getattr(mnt, dentry, stat);
+
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ /* Ask for all the fields in stat structure. Server will return
+ * whatever it supports
+ */
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ v9fs_stat2inode_dotl(st, dentry->d_inode);
+ generic_fillattr(dentry->d_inode, stat);
+ /* Change block size to what the server returned */
+ stat->blksize = st->st_blksize;
+
+ kfree(st);
+ return 0;
+}
+
+/**
+ * v9fs_vfs_setattr_dotl - set file metadata
+ * @dentry: file whose metadata to set
+ * @iattr: metadata assignment structure
+ *
+ */
+
+int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
+{
+ int retval;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
+ struct p9_iattr_dotl p9attr;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "\n");
+
+ retval = inode_change_ok(dentry->d_inode, iattr);
+ if (retval)
+ return retval;
+
+ p9attr.valid = iattr->ia_valid;
+ p9attr.mode = iattr->ia_mode;
+ p9attr.uid = iattr->ia_uid;
+ p9attr.gid = iattr->ia_gid;
+ p9attr.size = iattr->ia_size;
+ p9attr.atime_sec = iattr->ia_atime.tv_sec;
+ p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
+ p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
+ p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
+
+ retval = -EPERM;
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ retval = p9_client_setattr(fid, &p9attr);
+ if (retval < 0)
+ return retval;
+
+ if ((iattr->ia_valid & ATTR_SIZE) &&
+ iattr->ia_size != i_size_read(dentry->d_inode)) {
+ retval = vmtruncate(dentry->d_inode, iattr->ia_size);
+ if (retval)
+ return retval;
+ }
+
+ setattr_copy(dentry->d_inode, iattr);
+ mark_inode_dirty(dentry->d_inode);
+ if (iattr->ia_valid & ATTR_MODE) {
+ /* We also want to update ACL when we update mode bits */
+ retval = v9fs_acl_chmod(dentry);
+ if (retval < 0)
+ return retval;
+ }
+ return 0;
+}
+
+/**
+ * v9fs_stat2inode_dotl - populate an inode structure with stat info
+ * @stat: stat structure
+ * @inode: inode to populate
+ * @sb: superblock of filesystem
+ *
+ */
+
+void
+v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
+{
+
+ if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
+ inode->i_atime.tv_sec = stat->st_atime_sec;
+ inode->i_atime.tv_nsec = stat->st_atime_nsec;
+ inode->i_mtime.tv_sec = stat->st_mtime_sec;
+ inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
+ inode->i_ctime.tv_sec = stat->st_ctime_sec;
+ inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+ inode->i_uid = stat->st_uid;
+ inode->i_gid = stat->st_gid;
+ inode->i_nlink = stat->st_nlink;
+ inode->i_mode = stat->st_mode;
+ inode->i_rdev = new_decode_dev(stat->st_rdev);
+
+ if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
+ init_special_inode(inode, inode->i_mode, inode->i_rdev);
+
+ i_size_write(inode, stat->st_size);
+ inode->i_blocks = stat->st_blocks;
+ } else {
+ if (stat->st_result_mask & P9_STATS_ATIME) {
+ inode->i_atime.tv_sec = stat->st_atime_sec;
+ inode->i_atime.tv_nsec = stat->st_atime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_MTIME) {
+ inode->i_mtime.tv_sec = stat->st_mtime_sec;
+ inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_CTIME) {
+ inode->i_ctime.tv_sec = stat->st_ctime_sec;
+ inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_UID)
+ inode->i_uid = stat->st_uid;
+ if (stat->st_result_mask & P9_STATS_GID)
+ inode->i_gid = stat->st_gid;
+ if (stat->st_result_mask & P9_STATS_NLINK)
+ inode->i_nlink = stat->st_nlink;
+ if (stat->st_result_mask & P9_STATS_MODE) {
+ inode->i_mode = stat->st_mode;
+ if ((S_ISBLK(inode->i_mode)) ||
+ (S_ISCHR(inode->i_mode)))
+ init_special_inode(inode, inode->i_mode,
+ inode->i_rdev);
+ }
+ if (stat->st_result_mask & P9_STATS_RDEV)
+ inode->i_rdev = new_decode_dev(stat->st_rdev);
+ if (stat->st_result_mask & P9_STATS_SIZE)
+ i_size_write(inode, stat->st_size);
+ if (stat->st_result_mask & P9_STATS_BLOCKS)
+ inode->i_blocks = stat->st_blocks;
+ }
+ if (stat->st_result_mask & P9_STATS_GEN)
+ inode->i_generation = stat->st_gen;
+
+ /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
+ * because the inode structure does not have fields for them.
+ */
+}
+
+static int
+v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *dfid;
+ struct p9_fid *fid = NULL;
+ struct inode *inode;
+ struct p9_qid qid;
+ char *name;
+ int err;
+ gid_t gid;
+
+ name = (char *) dentry->d_name.name;
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
+ dir->i_ino, name, symname);
+ v9ses = v9fs_inode2v9ses(dir);
+
+ dfid = v9fs_fid_lookup(dentry->d_parent);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ return err;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+
+ /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
+ err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
+
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
+ goto error;
+ }
+
+ if (v9ses->cache) {
+ /* Now walk from the parent so we can get an unopened fid. */
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ /* instantiate inode and assign the unopened fid to dentry */
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ } else {
+ /* Not in cached mode. No need to populate inode with stat */
+ inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ }
+
+error:
+ if (fid)
+ p9_client_clunk(fid);
+
+ return err;
+}
+
+/**
+ * v9fs_vfs_link_dotl - create a hardlink for dotl
+ * @old_dentry: dentry for file to link to
+ * @dir: inode destination for new link
+ * @dentry: dentry for link
+ *
+ */
+
+static int
+v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ int err;
+ struct p9_fid *dfid, *oldfid;
+ char *name;
+ struct v9fs_session_info *v9ses;
+ struct dentry *dir_dentry;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
+ dir->i_ino, old_dentry->d_name.name,
+ dentry->d_name.name);
+
+ v9ses = v9fs_inode2v9ses(dir);
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid))
+ return PTR_ERR(dfid);
+
+ oldfid = v9fs_fid_lookup(old_dentry);
+ if (IS_ERR(oldfid))
+ return PTR_ERR(oldfid);
+
+ name = (char *) dentry->d_name.name;
+
+ err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
+
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
+ return err;
+ }
+
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ /* Get the latest stat info from server. */
+ struct p9_fid *fid;
+ struct p9_stat_dotl *st;
+
+ fid = v9fs_fid_lookup(old_dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ v9fs_stat2inode_dotl(st, old_dentry->d_inode);
+
+ kfree(st);
+ } else {
+ /* Caching disabled. No need to get upto date stat info.
+ * This dentry will be released immediately. So, just hold the
+ * inode
+ */
+ ihold(old_dentry->d_inode);
+ }
+ d_instantiate(dentry, old_dentry->d_inode);
+
+ return err;
+}
+
+/**
+ * v9fs_vfs_mknod_dotl - create a special file
+ * @dir: inode destination for new link
+ * @dentry: dentry for file
+ * @mode: mode for creation
+ * @rdev: device associated with special file
+ *
+ */
+static int
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
+ dev_t rdev)
+{
+ int err;
+ char *name;
+ mode_t mode;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL, *dfid = NULL;
+ struct inode *inode;
+ gid_t gid;
+ struct p9_qid qid;
+ struct dentry *dir_dentry;
+ struct posix_acl *dacl = NULL, *pacl = NULL;
+
+ P9_DPRINTK(P9_DEBUG_VFS,
+ " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+ dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
+
+ if (!new_valid_dev(rdev))
+ return -EINVAL;
+
+ v9ses = v9fs_inode2v9ses(dir);
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ dfid = NULL;
+ goto error;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ mode = omode;
+ /* Update mode based on ACL value */
+ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
+ if (err) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "Failed to get acl values in mknod %d\n", err);
+ goto error;
+ }
+ name = (char *) dentry->d_name.name;
+
+ err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
+ if (err < 0)
+ goto error;
+
+ /* instantiate inode and assign the unopened fid to the dentry */
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ } else {
+ /*
+ * Not in cached mode. No need to populate inode with stat.
+ * socket syscall returns a fd, so we need instantiate
+ */
+ inode = v9fs_get_inode(dir->i_sb, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ }
+ /* Now set the ACL based on the default value */
+ v9fs_set_create_acl(dentry, dacl, pacl);
+error:
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
+/**
+ * v9fs_vfs_follow_link_dotl - follow a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void *
+v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
+{
+ int retval;
+ struct p9_fid *fid;
+ char *link = __getname();
+ char *target;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "%s\n", dentry->d_name.name);
+
+ if (!link) {
+ link = ERR_PTR(-ENOMEM);
+ goto ndset;
+ }
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid)) {
+ __putname(link);
+ link = ERR_PTR(PTR_ERR(fid));
+ goto ndset;
+ }
+ retval = p9_client_readlink(fid, &target);
+ if (!retval) {
+ strcpy(link, target);
+ kfree(target);
+ goto ndset;
+ }
+ __putname(link);
+ link = ERR_PTR(retval);
+ndset:
+ nd_set_link(nd, link);
+ return NULL;
+}
+
+const struct inode_operations v9fs_dir_inode_operations_dotl = {
+ .create = v9fs_vfs_create_dotl,
+ .lookup = v9fs_vfs_lookup,
+ .link = v9fs_vfs_link_dotl,
+ .symlink = v9fs_vfs_symlink_dotl,
+ .unlink = v9fs_vfs_unlink,
+ .mkdir = v9fs_vfs_mkdir_dotl,
+ .rmdir = v9fs_vfs_rmdir,
+ .mknod = v9fs_vfs_mknod_dotl,
+ .rename = v9fs_vfs_rename,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
+ .check_acl = v9fs_check_acl,
+};
+
+const struct inode_operations v9fs_file_inode_operations_dotl = {
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
+ .check_acl = v9fs_check_acl,
+};
+
+const struct inode_operations v9fs_symlink_inode_operations_dotl = {
+ .readlink = generic_readlink,
+ .follow_link = v9fs_vfs_follow_link_dotl,
+ .put_link = v9fs_vfs_put_link,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
+};
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 1d12ba0..c55c614 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -39,6 +39,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/statfs.h>
+#include <linux/magic.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -46,6 +47,7 @@
#include "v9fs_vfs.h"
#include "fid.h"
#include "xattr.h"
+#include "acl.h"
static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl;
@@ -66,7 +68,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
* v9fs_fill_super - populate superblock with info
* @sb: superblock
* @v9ses: session information
- * @flags: flags propagated from v9fs_get_sb()
+ * @flags: flags propagated from v9fs_mount()
*
*/
@@ -88,22 +90,25 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
MS_NOATIME;
+#ifdef CONFIG_9P_FS_POSIX_ACL
+ if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)
+ sb->s_flags |= MS_POSIXACL;
+#endif
+
save_mount_options(sb, data);
}
/**
- * v9fs_get_sb - mount a superblock
+ * v9fs_mount - mount a superblock
* @fs_type: file system type
* @flags: mount flags
* @dev_name: device name that was mounted
* @data: mount options
- * @mnt: mountpoint record to be instantiated
*
*/
-static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
struct super_block *sb = NULL;
struct inode *inode = NULL;
@@ -117,7 +122,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
if (!v9ses)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
fid = v9fs_session_init(v9ses, dev_name, data);
if (IS_ERR(fid)) {
@@ -149,7 +154,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
goto release_sb;
}
sb->s_root = root;
-
if (v9fs_proto_dotl(v9ses)) {
struct p9_stat_dotl *st = NULL;
st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
@@ -174,19 +178,21 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
p9stat_free(st);
kfree(st);
}
-
+ retval = v9fs_get_acl(inode, fid);
+ if (retval)
+ goto release_sb;
v9fs_fid_add(root, fid);
P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
- simple_set_mnt(mnt, sb);
- return 0;
+ return dget(sb->s_root);
clunk_fid:
p9_client_clunk(fid);
close_session:
v9fs_session_close(v9ses);
kfree(v9ses);
- return retval;
+ return ERR_PTR(retval);
+
release_sb:
/*
* we will do the session_close and root dentry release
@@ -196,7 +202,7 @@ release_sb:
*/
p9_client_clunk(fid);
deactivate_locked_super(sb);
- return retval;
+ return ERR_PTR(retval);
}
/**
@@ -249,7 +255,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
if (v9fs_proto_dotl(v9ses)) {
res = p9_client_statfs(fid, &rs);
if (res == 0) {
- buf->f_type = rs.type;
+ buf->f_type = V9FS_MAGIC;
buf->f_bsize = rs.bsize;
buf->f_blocks = rs.blocks;
buf->f_bfree = rs.bfree;
@@ -292,7 +298,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
struct file_system_type v9fs_fs_type = {
.name = "9p",
- .get_sb = v9fs_get_sb,
+ .mount = v9fs_mount,
.kill_sb = v9fs_kill_super,
.owner = THIS_MODULE,
.fs_flags = FS_RENAME_DOES_D_MOVE,
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index f88e5c2..d288773 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -21,30 +21,13 @@
#include "fid.h"
#include "xattr.h"
-/*
- * v9fs_xattr_get()
- *
- * Copy an extended attribute into the buffer
- * provided, or compute the buffer size required.
- * Buffer is NULL to compute the size of the buffer required.
- *
- * Returns a negative error number on failure, or the number of bytes
- * used / required on success.
- */
-ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
- void *buffer, size_t buffer_size)
+ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
+ void *buffer, size_t buffer_size)
{
ssize_t retval;
int msize, read_count;
u64 offset = 0, attr_size;
- struct p9_fid *fid, *attr_fid;
-
- P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
- __func__, name, buffer_size);
-
- fid = v9fs_fid_lookup(dentry);
- if (IS_ERR(fid))
- return PTR_ERR(fid);
+ struct p9_fid *attr_fid;
attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
if (IS_ERR(attr_fid)) {
@@ -88,6 +71,31 @@ error:
}
+
+/*
+ * v9fs_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t buffer_size)
+{
+ struct p9_fid *fid;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
+ __func__, name, buffer_size);
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ return v9fs_fid_xattr_get(fid, name, buffer, buffer_size);
+}
+
/*
* v9fs_xattr_set()
*
@@ -125,7 +133,7 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
"p9_client_xattrcreate failed %d\n", retval);
goto error;
}
- msize = fid->clnt->msize;;
+ msize = fid->clnt->msize;
while (value_len) {
if (value_len > (msize - P9_IOHDRSZ))
write_count = msize - P9_IOHDRSZ;
@@ -156,5 +164,9 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
const struct xattr_handler *v9fs_xattr_handlers[] = {
&v9fs_xattr_user_handler,
+#ifdef CONFIG_9P_FS_POSIX_ACL
+ &v9fs_xattr_acl_access_handler,
+ &v9fs_xattr_acl_default_handler,
+#endif
NULL
};
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
index 9ddf672..eaa837c 100644
--- a/fs/9p/xattr.h
+++ b/fs/9p/xattr.h
@@ -15,10 +15,16 @@
#define FS_9P_XATTR_H
#include <linux/xattr.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
extern const struct xattr_handler *v9fs_xattr_handlers[];
extern struct xattr_handler v9fs_xattr_user_handler;
+extern const struct xattr_handler v9fs_xattr_acl_access_handler;
+extern const struct xattr_handler v9fs_xattr_acl_default_handler;
+extern ssize_t v9fs_fid_xattr_get(struct p9_fid *, const char *,
+ void *, size_t);
extern ssize_t v9fs_xattr_get(struct dentry *, const char *,
void *, size_t);
extern int v9fs_xattr_set(struct dentry *, const char *,
diff --git a/fs/Kconfig b/fs/Kconfig
index 97673c9..771f457 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -62,7 +62,6 @@ source "fs/notify/Kconfig"
source "fs/quota/Kconfig"
-source "fs/autofs/Kconfig"
source "fs/autofs4/Kconfig"
source "fs/fuse/Kconfig"
@@ -234,7 +233,6 @@ config NFS_COMMON
default y
source "net/sunrpc/Kconfig"
-source "fs/smbfs/Kconfig"
source "fs/ceph/Kconfig"
source "fs/cifs/Kconfig"
source "fs/ncpfs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 26956fc..a7f7cef 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -88,7 +88,6 @@ obj-$(CONFIG_NFSD) += nfsd/
obj-$(CONFIG_LOCKD) += lockd/
obj-$(CONFIG_NLS) += nls/
obj-$(CONFIG_SYSV_FS) += sysv/
-obj-$(CONFIG_SMB_FS) += smbfs/
obj-$(CONFIG_CIFS) += cifs/
obj-$(CONFIG_NCP_FS) += ncpfs/
obj-$(CONFIG_HPFS_FS) += hpfs/
@@ -101,7 +100,6 @@ obj-$(CONFIG_UBIFS_FS) += ubifs/
obj-$(CONFIG_AFFS_FS) += affs/
obj-$(CONFIG_ROMFS_FS) += romfs/
obj-$(CONFIG_QNX4FS_FS) += qnx4/
-obj-$(CONFIG_AUTOFS_FS) += autofs/
obj-$(CONFIG_AUTOFS4_FS) += autofs4/
obj-$(CONFIG_ADFS_FS) += adfs/
obj-$(CONFIG_FUSE_FS) += fuse/
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index f4287e4..bf7693c 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -201,7 +201,8 @@ const struct file_operations adfs_dir_operations = {
};
static int
-adfs_hash(struct dentry *parent, struct qstr *qstr)
+adfs_hash(const struct dentry *parent, const struct inode *inode,
+ struct qstr *qstr)
{
const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen;
const unsigned char *name;
@@ -237,17 +238,19 @@ adfs_hash(struct dentry *parent, struct qstr *qstr)
* requirements of the underlying filesystem.
*/
static int
-adfs_compare(struct dentry *parent, struct qstr *entry, struct qstr *name)
+adfs_compare(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
int i;
- if (entry->len != name->len)
+ if (len != name->len)
return 1;
for (i = 0; i < name->len; i++) {
char a, b;
- a = entry->name[i];
+ a = str[i];
b = name->name[i];
if (a >= 'A' && a <= 'Z')
@@ -273,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
struct object_info obj;
int error;
- dentry->d_op = &adfs_dentry_operations;
+ d_set_d_op(dentry, &adfs_dentry_operations);
lock_kernel();
error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj);
if (error == 0) {
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index d9803f7..a4041b5 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -240,11 +240,18 @@ static struct inode *adfs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void adfs_destroy_inode(struct inode *inode)
+static void adfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
}
+static void adfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, adfs_i_callback);
+}
+
static void init_once(void *foo)
{
struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
@@ -477,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
adfs_error(sb, "get root inode failed\n");
goto error;
} else
- sb->s_root->d_op = &adfs_dentry_operations;
+ d_set_d_op(sb->s_root, &adfs_dentry_operations);
unlock_kernel();
return 0;
@@ -490,17 +497,16 @@ error:
return -EINVAL;
}
-static int adfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *adfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, adfs_fill_super);
}
static struct file_system_type adfs_fs_type = {
.owner = THIS_MODULE,
.name = "adfs",
- .get_sb = adfs_get_sb,
+ .mount = adfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 7d0f0a3..3a4557e 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -128,7 +128,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
void *data = dentry->d_fsdata;
struct list_head *head, *next;
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
head = &inode->i_dentry;
next = head->next;
while (next != head) {
@@ -139,7 +139,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
}
next = next->next;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
}
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 914d1c0..944a404 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -13,11 +13,19 @@
typedef int (*toupper_t)(int);
static int affs_toupper(int ch);
-static int affs_hash_dentry(struct dentry *, struct qstr *);
-static int affs_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
+static int affs_hash_dentry(const struct dentry *,
+ const struct inode *, struct qstr *);
+static int affs_compare_dentry(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
static int affs_intl_toupper(int ch);
-static int affs_intl_hash_dentry(struct dentry *, struct qstr *);
-static int affs_intl_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
+static int affs_intl_hash_dentry(const struct dentry *,
+ const struct inode *, struct qstr *);
+static int affs_intl_compare_dentry(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
const struct dentry_operations affs_dentry_operations = {
.d_hash = affs_hash_dentry,
@@ -58,13 +66,13 @@ affs_get_toupper(struct super_block *sb)
* Note: the dentry argument is the parent dentry.
*/
static inline int
-__affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper)
+__affs_hash_dentry(struct qstr *qstr, toupper_t toupper)
{
const u8 *name = qstr->name;
unsigned long hash;
int i;
- i = affs_check_name(qstr->name,qstr->len);
+ i = affs_check_name(qstr->name, qstr->len);
if (i)
return i;
@@ -78,39 +86,41 @@ __affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper)
}
static int
-affs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
+affs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
- return __affs_hash_dentry(dentry, qstr, affs_toupper);
+ return __affs_hash_dentry(qstr, affs_toupper);
}
static int
-affs_intl_hash_dentry(struct dentry *dentry, struct qstr *qstr)
+affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
- return __affs_hash_dentry(dentry, qstr, affs_intl_toupper);
+ return __affs_hash_dentry(qstr, affs_intl_toupper);
}
-static inline int
-__affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, toupper_t toupper)
+static inline int __affs_compare_dentry(unsigned int len,
+ const char *str, const struct qstr *name, toupper_t toupper)
{
- const u8 *aname = a->name;
- const u8 *bname = b->name;
- int len;
+ const u8 *aname = str;
+ const u8 *bname = name->name;
- /* 'a' is the qstr of an already existing dentry, so the name
- * must be valid. 'b' must be validated first.
+ /*
+ * 'str' is the name of an already existing dentry, so the name
+ * must be valid. 'name' must be validated first.
*/
- if (affs_check_name(b->name,b->len))
+ if (affs_check_name(name->name, name->len))
return 1;
- /* If the names are longer than the allowed 30 chars,
+ /*
+ * If the names are longer than the allowed 30 chars,
* the excess is ignored, so their length may differ.
*/
- len = a->len;
if (len >= 30) {
- if (b->len < 30)
+ if (name->len < 30)
return 1;
len = 30;
- } else if (len != b->len)
+ } else if (len != name->len)
return 1;
for (; len > 0; len--)
@@ -121,14 +131,18 @@ __affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, tou
}
static int
-affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
+affs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- return __affs_compare_dentry(dentry, a, b, affs_toupper);
+ return __affs_compare_dentry(len, str, name, affs_toupper);
}
static int
-affs_intl_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
+affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- return __affs_compare_dentry(dentry, a, b, affs_intl_toupper);
+ return __affs_compare_dentry(len, str, name, affs_intl_toupper);
}
/*
@@ -226,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
if (IS_ERR(inode))
return ERR_CAST(inode);
}
- dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations;
+ d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations);
d_add(dentry, inode);
return NULL;
}
diff --git a/fs/affs/super.c b/fs/affs/super.c
index fa4fbe1..d39081b 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -95,11 +95,18 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
return &i->vfs_inode;
}
-static void affs_destroy_inode(struct inode *inode)
+static void affs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
}
+static void affs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, affs_i_callback);
+}
+
static void init_once(void *foo)
{
struct affs_inode_info *ei = (struct affs_inode_info *) foo;
@@ -475,7 +482,7 @@ got_root:
printk(KERN_ERR "AFFS: Get root inode failed\n");
goto out_error;
}
- sb->s_root->d_op = &affs_dentry_operations;
+ d_set_d_op(sb->s_root, &affs_dentry_operations);
pr_debug("AFFS: s_flags=%lX\n",sb->s_flags);
return 0;
@@ -573,17 +580,16 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static int affs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *affs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, affs_fill_super);
}
static struct file_system_type affs_fs_type = {
.owner = THIS_MODULE,
.name = "affs",
- .get_sb = affs_get_sb,
+ .mount = affs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5439e1b..34a3263 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/ctype.h>
#include <linux/sched.h>
@@ -23,7 +24,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
static int afs_dir_open(struct inode *inode, struct file *file);
static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
-static int afs_d_delete(struct dentry *dentry);
+static int afs_d_delete(const struct dentry *dentry);
static void afs_d_release(struct dentry *dentry);
static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
loff_t fpos, u64 ino, unsigned dtype);
@@ -581,7 +582,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
}
success:
- dentry->d_op = &afs_fs_dentry_operations;
+ d_set_d_op(dentry, &afs_fs_dentry_operations);
d_add(dentry, inode);
_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }",
@@ -607,6 +608,9 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
void *dir_version;
int ret;
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
vnode = AFS_FS_I(dentry->d_inode);
if (dentry->d_inode)
@@ -730,7 +734,7 @@ out_bad:
* - called from dput() when d_count is going to 0.
* - return 1 to request dentry be unhashed, 0 otherwise
*/
-static int afs_d_delete(struct dentry *dentry)
+static int afs_d_delete(const struct dentry *dentry)
{
_enter("%s", dentry->d_name.name);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index cca8eef..6d4bc1c 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -624,7 +624,7 @@ extern void afs_clear_permits(struct afs_vnode *);
extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
extern void afs_zap_permits(struct rcu_head *);
extern struct key *afs_request_key(struct afs_cell *);
-extern int afs_permission(struct inode *, int);
+extern int afs_permission(struct inode *, int, unsigned int);
/*
* server.c
diff --git a/fs/afs/security.c b/fs/afs/security.c
index bb4ed14..f44b9d3 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -285,13 +285,16 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
* - AFS ACLs are attached to directories only, and a file is controlled by its
* parent directory's ACL
*/
-int afs_permission(struct inode *inode, int mask)
+int afs_permission(struct inode *inode, int mask, unsigned int flags)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
afs_access_t uninitialized_var(access);
struct key *key;
int ret;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
_enter("{{%x:%u},%lx},%x,",
vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
@@ -347,7 +350,7 @@ int afs_permission(struct inode *inode, int mask)
}
key_put(key);
- ret = generic_permission(inode, mask, NULL);
+ ret = generic_permission(inode, mask, flags, NULL);
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index eacf76d..f901a9d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -29,9 +29,8 @@
#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
static void afs_i_init_once(void *foo);
-static int afs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data, struct vfsmount *mnt);
+static struct dentry *afs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data);
static struct inode *afs_alloc_inode(struct super_block *sb);
static void afs_put_super(struct super_block *sb);
static void afs_destroy_inode(struct inode *inode);
@@ -40,7 +39,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
struct file_system_type afs_fs_type = {
.owner = THIS_MODULE,
.name = "afs",
- .get_sb = afs_get_sb,
+ .mount = afs_mount,
.kill_sb = kill_anon_super,
.fs_flags = 0,
};
@@ -359,11 +358,8 @@ error:
/*
* get an AFS superblock
*/
-static int afs_get_sb(struct file_system_type *fs_type,
- int flags,
- const char *dev_name,
- void *options,
- struct vfsmount *mnt)
+static struct dentry *afs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *options)
{
struct afs_mount_params params;
struct super_block *sb;
@@ -427,12 +423,11 @@ static int afs_get_sb(struct file_system_type *fs_type,
ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
}
- simple_set_mnt(mnt, sb);
afs_put_volume(params.volume);
afs_put_cell(params.cell);
kfree(new_opts);
_leave(" = 0 [%p]", sb);
- return 0;
+ return dget(sb->s_root);
error:
afs_put_volume(params.volume);
@@ -440,7 +435,7 @@ error:
key_put(params.key);
kfree(new_opts);
_leave(" = %d", ret);
- return ret;
+ return ERR_PTR(ret);
}
/*
@@ -503,6 +498,14 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
return &vnode->vfs_inode;
}
+static void afs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(afs_inode_cachep, vnode);
+}
+
/*
* destroy an AFS inode struct
*/
@@ -516,7 +519,7 @@ static void afs_destroy_inode(struct inode *inode)
ASSERTCMP(vnode->server, ==, NULL);
- kmem_cache_free(afs_inode_cachep, vnode);
+ call_rcu(&inode->i_rcu, afs_i_callback);
atomic_dec(&afs_count_active_inodes);
}
diff --git a/fs/aio.c b/fs/aio.c
index 8c8f6c5..5e00f15 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -798,29 +798,12 @@ static void aio_queue_work(struct kioctx * ctx)
queue_delayed_work(aio_wq, &ctx->wq, timeout);
}
-
-/*
- * aio_run_iocbs:
- * Process all pending retries queued on the ioctx
- * run list.
- * Assumes it is operating within the aio issuer's mm
- * context.
- */
-static inline void aio_run_iocbs(struct kioctx *ctx)
-{
- int requeue;
-
- spin_lock_irq(&ctx->ctx_lock);
-
- requeue = __aio_run_iocbs(ctx);
- spin_unlock_irq(&ctx->ctx_lock);
- if (requeue)
- aio_queue_work(ctx);
-}
-
/*
- * just like aio_run_iocbs, but keeps running them until
- * the list stays empty
+ * aio_run_all_iocbs:
+ * Process all pending retries queued on the ioctx
+ * run list, and keep running them until the list
+ * stays empty.
+ * Assumes it is operating within the aio issuer's mm context.
*/
static inline void aio_run_all_iocbs(struct kioctx *ctx)
{
@@ -1839,7 +1822,7 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
long ret = -EINVAL;
if (likely(ioctx)) {
- if (likely(min_nr <= nr && min_nr >= 0 && nr >= 0))
+ if (likely(min_nr <= nr && min_nr >= 0))
ret = read_events(ioctx, min_nr, nr, events, timeout);
put_ioctx(ioctx);
}
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 5365527..98edb65 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -26,12 +26,10 @@ static struct vfsmount *anon_inode_mnt __read_mostly;
static struct inode *anon_inode_inode;
static const struct file_operations anon_inode_fops;
-static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC,
- mnt);
+ return mount_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC);
}
/*
@@ -45,7 +43,7 @@ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
static struct file_system_type anon_inode_fs_type = {
.name = "anon_inodefs",
- .get_sb = anon_inodefs_get_sb,
+ .mount = anon_inodefs_mount,
.kill_sb = kill_anon_super,
};
static const struct dentry_operations anon_inodefs_dentry_operations = {
@@ -66,9 +64,9 @@ static const struct address_space_operations anon_aops = {
};
/**
- * anon_inode_getfd - creates a new file instance by hooking it up to an
- * anonymous inode, and a dentry that describe the "class"
- * of the file
+ * anon_inode_getfile - creates a new file instance by hooking it up to an
+ * anonymous inode, and a dentry that describe the "class"
+ * of the file
*
* @name: [in] name of the "class" of the new file
* @fops: [in] file operations for the new file
@@ -104,7 +102,7 @@ struct file *anon_inode_getfile(const char *name,
this.name = name;
this.len = strlen(name);
this.hash = 0;
- path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
+ path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
if (!path.dentry)
goto err_module;
@@ -115,7 +113,7 @@ struct file *anon_inode_getfile(const char *name,
*/
ihold(anon_inode_inode);
- path.dentry->d_op = &anon_inodefs_dentry_operations;
+ d_set_d_op(path.dentry, &anon_inodefs_dentry_operations);
d_instantiate(path.dentry, anon_inode_inode);
error = -ENFILE;
@@ -234,7 +232,7 @@ static int __init anon_inode_init(void)
return 0;
err_mntput:
- mntput(anon_inode_mnt);
+ mntput_long(anon_inode_mnt);
err_unregister_filesystem:
unregister_filesystem(&anon_inode_fs_type);
err_exit:
diff --git a/fs/autofs/Kconfig b/fs/autofs/Kconfig
deleted file mode 100644
index 480e210..0000000
--- a/fs/autofs/Kconfig
+++ /dev/null
@@ -1,22 +0,0 @@
-config AUTOFS_FS
- tristate "Kernel automounter support"
- depends on BKL # unfixable, just use autofs4
- help
- The automounter is a tool to automatically mount remote file systems
- on demand. This implementation is partially kernel-based to reduce
- overhead in the already-mounted case; this is unlike the BSD
- automounter (amd), which is a pure user space daemon.
-
- To use the automounter you need the user-space tools from the autofs
- package; you can find the location in <file:Documentation/Changes>.
- You also want to answer Y to "NFS file system support", below.
-
- If you want to use the newer version of the automounter with more
- features, say N here and say Y to "Kernel automounter v4 support",
- below.
-
- To compile this support as a module, choose M here: the module will be
- called autofs.
-
- If you are not a part of a fairly large, distributed network, you
- probably do not need an automounter, and can say N here.
diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile
deleted file mode 100644
index 453a60f..0000000
--- a/fs/autofs/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the linux autofs-filesystem routines.
-#
-
-obj-$(CONFIG_AUTOFS_FS) += autofs.o
-
-autofs-objs := dirhash.o init.o inode.o root.o symlink.o waitq.o
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
deleted file mode 100644
index 901a3e6..0000000
--- a/fs/autofs/autofs_i.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *
- * linux/fs/autofs/autofs_i.h
- *
- * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/* Internal header file for autofs */
-
-#include <linux/auto_fs.h>
-
-/* This is the range of ioctl() numbers we claim as ours */
-#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
-#define AUTOFS_IOC_COUNT 32
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/string.h>
-#include <linux/wait.h>
-#include <linux/dcache.h>
-#include <linux/namei.h>
-#include <linux/mount.h>
-#include <linux/sched.h>
-
-#include <asm/current.h>
-#include <asm/uaccess.h>
-
-#ifdef DEBUG
-#define DPRINTK(D) (printk D)
-#else
-#define DPRINTK(D) ((void)0)
-#endif
-
-/*
- * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the
- * kernel will keep the negative response cached for up to the time given
- * here, although the time can be shorter if the kernel throws the dcache
- * entry away. This probably should be settable from user space.
- */
-#define AUTOFS_NEGATIVE_TIMEOUT (60*HZ) /* 1 minute */
-
-/* Structures associated with the root directory hash table */
-
-#define AUTOFS_HASH_SIZE 67
-
-struct autofs_dir_ent {
- int hash;
- char *name;
- int len;
- ino_t ino;
- struct dentry *dentry;
- /* Linked list of entries */
- struct autofs_dir_ent *next;
- struct autofs_dir_ent **back;
- /* The following entries are for the expiry system */
- unsigned long last_usage;
- struct list_head exp;
-};
-
-struct autofs_dirhash {
- struct autofs_dir_ent *h[AUTOFS_HASH_SIZE];
- struct list_head expiry_head;
-};
-
-struct autofs_wait_queue {
- wait_queue_head_t queue;
- struct autofs_wait_queue *next;
- autofs_wqt_t wait_queue_token;
- /* We use the following to see what we are waiting for */
- int hash;
- int len;
- char *name;
- /* This is for status reporting upon return */
- int status;
- int wait_ctr;
-};
-
-struct autofs_symlink {
- char *data;
- int len;
- time_t mtime;
-};
-
-#define AUTOFS_MAX_SYMLINKS 256
-
-#define AUTOFS_ROOT_INO 1
-#define AUTOFS_FIRST_SYMLINK 2
-#define AUTOFS_FIRST_DIR_INO (AUTOFS_FIRST_SYMLINK+AUTOFS_MAX_SYMLINKS)
-
-#define AUTOFS_SYMLINK_BITMAP_LEN \
- ((AUTOFS_MAX_SYMLINKS+((sizeof(long)*1)-1))/(sizeof(long)*8))
-
-#define AUTOFS_SBI_MAGIC 0x6d4a556d
-
-struct autofs_sb_info {
- u32 magic;
- struct file *pipe;
- struct pid *oz_pgrp;
- int catatonic;
- struct super_block *sb;
- unsigned long exp_timeout;
- ino_t next_dir_ino;
- struct autofs_wait_queue *queues; /* Wait queue pointer */
- struct autofs_dirhash dirhash; /* Root directory hash */
- struct autofs_symlink symlink[AUTOFS_MAX_SYMLINKS];
- unsigned long symlink_bitmap[AUTOFS_SYMLINK_BITMAP_LEN];
-};
-
-static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
-{
- return (struct autofs_sb_info *)(sb->s_fs_info);
-}
-
-/* autofs_oz_mode(): do we see the man behind the curtain? (The
- processes which do manipulations for us in user space sees the raw
- filesystem without "magic".) */
-
-static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
- return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
-}
-
-/* Hash operations */
-
-void autofs_initialize_hash(struct autofs_dirhash *);
-struct autofs_dir_ent *autofs_hash_lookup(const struct autofs_dirhash *,struct qstr *);
-void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *);
-void autofs_hash_delete(struct autofs_dir_ent *);
-struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *);
-void autofs_hash_dputall(struct autofs_dirhash *);
-void autofs_hash_nuke(struct autofs_sb_info *);
-
-/* Expiration-handling functions */
-
-void autofs_update_usage(struct autofs_dirhash *,struct autofs_dir_ent *);
-struct autofs_dir_ent *autofs_expire(struct super_block *,struct autofs_sb_info *, struct vfsmount *mnt);
-
-/* Operations structures */
-
-extern const struct inode_operations autofs_root_inode_operations;
-extern const struct inode_operations autofs_symlink_inode_operations;
-extern const struct file_operations autofs_root_operations;
-
-/* Initializing function */
-
-int autofs_fill_super(struct super_block *, void *, int);
-void autofs_kill_sb(struct super_block *sb);
-struct inode *autofs_iget(struct super_block *, unsigned long);
-
-/* Queue management functions */
-
-int autofs_wait(struct autofs_sb_info *,struct qstr *);
-int autofs_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
-void autofs_catatonic_mode(struct autofs_sb_info *);
-
-#ifdef DEBUG
-void autofs_say(const char *name, int len);
-#else
-#define autofs_say(n,l) ((void)0)
-#endif
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
deleted file mode 100644
index e947915..0000000
--- a/fs/autofs/dirhash.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/dirhash.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include "autofs_i.h"
-
-/* Functions for maintenance of expiry queue */
-
-static void autofs_init_usage(struct autofs_dirhash *dh,
- struct autofs_dir_ent *ent)
-{
- list_add_tail(&ent->exp, &dh->expiry_head);
- ent->last_usage = jiffies;
-}
-
-static void autofs_delete_usage(struct autofs_dir_ent *ent)
-{
- list_del(&ent->exp);
-}
-
-void autofs_update_usage(struct autofs_dirhash *dh,
- struct autofs_dir_ent *ent)
-{
- autofs_delete_usage(ent); /* Unlink from current position */
- autofs_init_usage(dh,ent); /* Relink at queue tail */
-}
-
-struct autofs_dir_ent *autofs_expire(struct super_block *sb,
- struct autofs_sb_info *sbi,
- struct vfsmount *mnt)
-{
- struct autofs_dirhash *dh = &sbi->dirhash;
- struct autofs_dir_ent *ent;
- unsigned long timeout = sbi->exp_timeout;
-
- while (1) {
- struct path path;
- int umount_ok;
-
- if ( list_empty(&dh->expiry_head) || sbi->catatonic )
- return NULL; /* No entries */
- /* We keep the list sorted by last_usage and want old stuff */
- ent = list_entry(dh->expiry_head.next, struct autofs_dir_ent, exp);
- if (jiffies - ent->last_usage < timeout)
- break;
- /* Move to end of list in case expiry isn't desirable */
- autofs_update_usage(dh, ent);
-
- /* Check to see that entry is expirable */
- if ( ent->ino < AUTOFS_FIRST_DIR_INO )
- return ent; /* Symlinks are always expirable */
-
- /* Get the dentry for the autofs subdirectory */
- path.dentry = ent->dentry;
-
- if (!path.dentry) {
- /* Should only happen in catatonic mode */
- printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name);
- autofs_delete_usage(ent);
- continue;
- }
-
- if (!path.dentry->d_inode) {
- dput(path.dentry);
- printk("autofs: negative dentry on expiry queue: %s\n",
- ent->name);
- autofs_delete_usage(ent);
- continue;
- }
-
- /* Make sure entry is mounted and unused; note that dentry will
- point to the mounted-on-top root. */
- if (!S_ISDIR(path.dentry->d_inode->i_mode) ||
- !d_mountpoint(path.dentry)) {
- DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
- continue;
- }
- path.mnt = mnt;
- path_get(&path);
- if (!follow_down(&path)) {
- path_put(&path);
- DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
- continue;
- }
- while (d_mountpoint(path.dentry) && follow_down(&path))
- ;
- umount_ok = may_umount(path.mnt);
- path_put(&path);
-
- if (umount_ok) {
- DPRINTK(("autofs: signaling expire on %s\n", ent->name));
- return ent; /* Expirable! */
- }
- DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name));
- }
- return NULL; /* No expirable entries */
-}
-
-void autofs_initialize_hash(struct autofs_dirhash *dh) {
- memset(&dh->h, 0, AUTOFS_HASH_SIZE*sizeof(struct autofs_dir_ent *));
- INIT_LIST_HEAD(&dh->expiry_head);
-}
-
-struct autofs_dir_ent *autofs_hash_lookup(const struct autofs_dirhash *dh, struct qstr *name)
-{
- struct autofs_dir_ent *dhn;
-
- DPRINTK(("autofs_hash_lookup: hash = 0x%08x, name = ", name->hash));
- autofs_say(name->name,name->len);
-
- for ( dhn = dh->h[(unsigned) name->hash % AUTOFS_HASH_SIZE] ; dhn ; dhn = dhn->next ) {
- if ( name->hash == dhn->hash &&
- name->len == dhn->len &&
- !memcmp(name->name, dhn->name, name->len) )
- break;
- }
-
- return dhn;
-}
-
-void autofs_hash_insert(struct autofs_dirhash *dh, struct autofs_dir_ent *ent)
-{
- struct autofs_dir_ent **dhnp;
-
- DPRINTK(("autofs_hash_insert: hash = 0x%08x, name = ", ent->hash));
- autofs_say(ent->name,ent->len);
-
- autofs_init_usage(dh,ent);
- if (ent->dentry)
- dget(ent->dentry);
-
- dhnp = &dh->h[(unsigned) ent->hash % AUTOFS_HASH_SIZE];
- ent->next = *dhnp;
- ent->back = dhnp;
- *dhnp = ent;
- if ( ent->next )
- ent->next->back = &(ent->next);
-}
-
-void autofs_hash_delete(struct autofs_dir_ent *ent)
-{
- *(ent->back) = ent->next;
- if ( ent->next )
- ent->next->back = ent->back;
-
- autofs_delete_usage(ent);
-
- if ( ent->dentry )
- dput(ent->dentry);
- kfree(ent->name);
- kfree(ent);
-}
-
-/*
- * Used by readdir(). We must validate "ptr", so we can't simply make it
- * a pointer. Values below 0xffff are reserved; calling with any value
- * <= 0x10000 will return the first entry found.
- *
- * "last" can be NULL or the value returned by the last search *if* we
- * want the next sequential entry.
- */
-struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *dh,
- off_t *ptr, struct autofs_dir_ent *last)
-{
- int bucket, ecount, i;
- struct autofs_dir_ent *ent;
-
- bucket = (*ptr >> 16) - 1;
- ecount = *ptr & 0xffff;
-
- if ( bucket < 0 ) {
- bucket = ecount = 0;
- }
-
- DPRINTK(("autofs_hash_enum: bucket %d, entry %d\n", bucket, ecount));
-
- ent = last ? last->next : NULL;
-
- if ( ent ) {
- ecount++;
- } else {
- while ( bucket < AUTOFS_HASH_SIZE ) {
- ent = dh->h[bucket];
- for ( i = ecount ; ent && i ; i-- )
- ent = ent->next;
-
- if (ent) {
- ecount++; /* Point to *next* entry */
- break;
- }
-
- bucket++; ecount = 0;
- }
- }
-
-#ifdef DEBUG
- if ( !ent )
- printk("autofs_hash_enum: nothing found\n");
- else {
- printk("autofs_hash_enum: found hash %08x, name", ent->hash);
- autofs_say(ent->name,ent->len);
- }
-#endif
-
- *ptr = ((bucket+1) << 16) + ecount;
- return ent;
-}
-
-/* Iterate over all the ents, and remove all dentry pointers. Used on
- entering catatonic mode, in order to make the filesystem unmountable. */
-void autofs_hash_dputall(struct autofs_dirhash *dh)
-{
- int i;
- struct autofs_dir_ent *ent;
-
- for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
- for ( ent = dh->h[i] ; ent ; ent = ent->next ) {
- if ( ent->dentry ) {
- dput(ent->dentry);
- ent->dentry = NULL;
- }
- }
- }
-}
-
-/* Delete everything. This is used on filesystem destruction, so we
- make no attempt to keep the pointers valid */
-void autofs_hash_nuke(struct autofs_sb_info *sbi)
-{
- int i;
- struct autofs_dir_ent *ent, *nent;
-
- for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
- for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) {
- nent = ent->next;
- if ( ent->dentry )
- dput(ent->dentry);
- kfree(ent->name);
- kfree(ent);
- }
- }
-}
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
deleted file mode 100644
index cea5219..0000000
--- a/fs/autofs/init.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/init.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include "autofs_i.h"
-
-static int autofs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
-{
- return get_sb_nodev(fs_type, flags, data, autofs_fill_super, mnt);
-}
-
-static struct file_system_type autofs_fs_type = {
- .owner = THIS_MODULE,
- .name = "autofs",
- .get_sb = autofs_get_sb,
- .kill_sb = autofs_kill_sb,
-};
-
-static int __init init_autofs_fs(void)
-{
- return register_filesystem(&autofs_fs_type);
-}
-
-static void __exit exit_autofs_fs(void)
-{
- unregister_filesystem(&autofs_fs_type);
-}
-
-module_init(init_autofs_fs);
-module_exit(exit_autofs_fs);
-
-#ifdef DEBUG
-void autofs_say(const char *name, int len)
-{
- printk("(%d: ", len);
- while ( len-- )
- printk("%c", *name++);
- printk(")\n");
-}
-#endif
-MODULE_LICENSE("GPL");
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
deleted file mode 100644
index e1734f2..0000000
--- a/fs/autofs/inode.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/inode.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/file.h>
-#include <linux/parser.h>
-#include <linux/bitops.h>
-#include <linux/magic.h>
-#include "autofs_i.h"
-#include <linux/module.h>
-
-void autofs_kill_sb(struct super_block *sb)
-{
- struct autofs_sb_info *sbi = autofs_sbi(sb);
- unsigned int n;
-
- /*
- * In the event of a failure in get_sb_nodev the superblock
- * info is not present so nothing else has been setup, so
- * just call kill_anon_super when we are called from
- * deactivate_super.
- */
- if (!sbi)
- goto out_kill_sb;
-
- if (!sbi->catatonic)
- autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
-
- put_pid(sbi->oz_pgrp);
-
- autofs_hash_nuke(sbi);
- for (n = 0; n < AUTOFS_MAX_SYMLINKS; n++) {
- if (test_bit(n, sbi->symlink_bitmap))
- kfree(sbi->symlink[n].data);
- }
-
- kfree(sb->s_fs_info);
-
-out_kill_sb:
- DPRINTK(("autofs: shutting down\n"));
- kill_anon_super(sb);
-}
-
-static const struct super_operations autofs_sops = {
- .statfs = simple_statfs,
- .show_options = generic_show_options,
-};
-
-enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
-
-static const match_table_t autofs_tokens = {
- {Opt_fd, "fd=%u"},
- {Opt_uid, "uid=%u"},
- {Opt_gid, "gid=%u"},
- {Opt_pgrp, "pgrp=%u"},
- {Opt_minproto, "minproto=%u"},
- {Opt_maxproto, "maxproto=%u"},
- {Opt_err, NULL}
-};
-
-static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
- pid_t *pgrp, int *minproto, int *maxproto)
-{
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int option;
-
- *uid = current_uid();
- *gid = current_gid();
- *pgrp = task_pgrp_nr(current);
-
- *minproto = *maxproto = AUTOFS_PROTO_VERSION;
-
- *pipefd = -1;
-
- if (!options)
- return 1;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
- if (!*p)
- continue;
-
- token = match_token(p, autofs_tokens, args);
- switch (token) {
- case Opt_fd:
- if (match_int(&args[0], &option))
- return 1;
- *pipefd = option;
- break;
- case Opt_uid:
- if (match_int(&args[0], &option))
- return 1;
- *uid = option;
- break;
- case Opt_gid:
- if (match_int(&args[0], &option))
- return 1;
- *gid = option;
- break;
- case Opt_pgrp:
- if (match_int(&args[0], &option))
- return 1;
- *pgrp = option;
- break;
- case Opt_minproto:
- if (match_int(&args[0], &option))
- return 1;
- *minproto = option;
- break;
- case Opt_maxproto:
- if (match_int(&args[0], &option))
- return 1;
- *maxproto = option;
- break;
- default:
- return 1;
- }
- }
- return (*pipefd < 0);
-}
-
-int autofs_fill_super(struct super_block *s, void *data, int silent)
-{
- struct inode * root_inode;
- struct dentry * root;
- struct file * pipe;
- int pipefd;
- struct autofs_sb_info *sbi;
- int minproto, maxproto;
- pid_t pgid;
-
- save_mount_options(s, data);
-
- sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
- if (!sbi)
- goto fail_unlock;
- DPRINTK(("autofs: starting up, sbi = %p\n",sbi));
-
- s->s_fs_info = sbi;
- sbi->magic = AUTOFS_SBI_MAGIC;
- sbi->pipe = NULL;
- sbi->catatonic = 1;
- sbi->exp_timeout = 0;
- autofs_initialize_hash(&sbi->dirhash);
- sbi->queues = NULL;
- memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
- sbi->next_dir_ino = AUTOFS_FIRST_DIR_INO;
- s->s_blocksize = 1024;
- s->s_blocksize_bits = 10;
- s->s_magic = AUTOFS_SUPER_MAGIC;
- s->s_op = &autofs_sops;
- s->s_time_gran = 1;
- sbi->sb = s;
-
- root_inode = autofs_iget(s, AUTOFS_ROOT_INO);
- if (IS_ERR(root_inode))
- goto fail_free;
- root = d_alloc_root(root_inode);
- pipe = NULL;
-
- if (!root)
- goto fail_iput;
-
- /* Can this call block? - WTF cares? s is locked. */
- if (parse_options(data, &pipefd, &root_inode->i_uid,
- &root_inode->i_gid, &pgid, &minproto,
- &maxproto)) {
- printk("autofs: called with bogus options\n");
- goto fail_dput;
- }
-
- /* Couldn't this be tested earlier? */
- if (minproto > AUTOFS_PROTO_VERSION ||
- maxproto < AUTOFS_PROTO_VERSION) {
- printk("autofs: kernel does not match daemon version\n");
- goto fail_dput;
- }
-
- DPRINTK(("autofs: pipe fd = %d, pgrp = %u\n", pipefd, pgid));
- sbi->oz_pgrp = find_get_pid(pgid);
-
- if (!sbi->oz_pgrp) {
- printk("autofs: could not find process group %d\n", pgid);
- goto fail_dput;
- }
-
- pipe = fget(pipefd);
-
- if (!pipe) {
- printk("autofs: could not open pipe file descriptor\n");
- goto fail_put_pid;
- }
-
- if (!pipe->f_op || !pipe->f_op->write)
- goto fail_fput;
- sbi->pipe = pipe;
- sbi->catatonic = 0;
-
- /*
- * Success! Install the root dentry now to indicate completion.
- */
- s->s_root = root;
- return 0;
-
-fail_fput:
- printk("autofs: pipe file descriptor does not contain proper ops\n");
- fput(pipe);
-fail_put_pid:
- put_pid(sbi->oz_pgrp);
-fail_dput:
- dput(root);
- goto fail_free;
-fail_iput:
- printk("autofs: get root dentry failed\n");
- iput(root_inode);
-fail_free:
- kfree(sbi);
- s->s_fs_info = NULL;
-fail_unlock:
- return -EINVAL;
-}
-
-struct inode *autofs_iget(struct super_block *sb, unsigned long ino)
-{
- unsigned int n;
- struct autofs_sb_info *sbi = autofs_sbi(sb);
- struct inode *inode;
-
- inode = iget_locked(sb, ino);
- if (!inode)
- return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
- return inode;
-
- /* Initialize to the default case (stub directory) */
-
- inode->i_op = &simple_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
- inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
- inode->i_nlink = 2;
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-
- if (ino == AUTOFS_ROOT_INO) {
- inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
- inode->i_op = &autofs_root_inode_operations;
- inode->i_fop = &autofs_root_operations;
- goto done;
- }
-
- inode->i_uid = inode->i_sb->s_root->d_inode->i_uid;
- inode->i_gid = inode->i_sb->s_root->d_inode->i_gid;
-
- if (ino >= AUTOFS_FIRST_SYMLINK && ino < AUTOFS_FIRST_DIR_INO) {
- /* Symlink inode - should be in symlink list */
- struct autofs_symlink *sl;
-
- n = ino - AUTOFS_FIRST_SYMLINK;
- if (n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap)) {
- printk("autofs: Looking for bad symlink inode %u\n", (unsigned int) ino);
- goto done;
- }
-
- inode->i_op = &autofs_symlink_inode_operations;
- sl = &sbi->symlink[n];
- inode->i_private = sl;
- inode->i_mode = S_IFLNK | S_IRWXUGO;
- inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = sl->mtime;
- inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
- inode->i_size = sl->len;
- inode->i_nlink = 1;
- }
-
-done:
- unlock_new_inode(inode);
- return inode;
-}
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
deleted file mode 100644
index 0c4ca81..0000000
--- a/fs/autofs/root.c
+++ /dev/null
@@ -1,645 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/root.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/stat.h>
-#include <linux/slab.h>
-#include <linux/param.h>
-#include <linux/time.h>
-#include <linux/compat.h>
-#include <linux/smp_lock.h>
-#include "autofs_i.h"
-
-static int autofs_root_readdir(struct file *,void *,filldir_t);
-static struct dentry *autofs_root_lookup(struct inode *,struct dentry *, struct nameidata *);
-static int autofs_root_symlink(struct inode *,struct dentry *,const char *);
-static int autofs_root_unlink(struct inode *,struct dentry *);
-static int autofs_root_rmdir(struct inode *,struct dentry *);
-static int autofs_root_mkdir(struct inode *,struct dentry *,int);
-static long autofs_root_ioctl(struct file *,unsigned int,unsigned long);
-#ifdef CONFIG_COMPAT
-static long autofs_root_compat_ioctl(struct file *,unsigned int,unsigned long);
-#endif
-
-const struct file_operations autofs_root_operations = {
- .llseek = generic_file_llseek,
- .read = generic_read_dir,
- .readdir = autofs_root_readdir,
- .unlocked_ioctl = autofs_root_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = autofs_root_compat_ioctl,
-#endif
-};
-
-const struct inode_operations autofs_root_inode_operations = {
- .lookup = autofs_root_lookup,
- .unlink = autofs_root_unlink,
- .symlink = autofs_root_symlink,
- .mkdir = autofs_root_mkdir,
- .rmdir = autofs_root_rmdir,
-};
-
-static int autofs_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
- struct autofs_dir_ent *ent = NULL;
- struct autofs_dirhash *dirhash;
- struct autofs_sb_info *sbi;
- struct inode * inode = filp->f_path.dentry->d_inode;
- off_t onr, nr;
-
- lock_kernel();
-
- sbi = autofs_sbi(inode->i_sb);
- dirhash = &sbi->dirhash;
- nr = filp->f_pos;
-
- switch(nr)
- {
- case 0:
- if (filldir(dirent, ".", 1, nr, inode->i_ino, DT_DIR) < 0)
- goto out;
- filp->f_pos = ++nr;
- /* fall through */
- case 1:
- if (filldir(dirent, "..", 2, nr, inode->i_ino, DT_DIR) < 0)
- goto out;
- filp->f_pos = ++nr;
- /* fall through */
- default:
- while (onr = nr, ent = autofs_hash_enum(dirhash,&nr,ent)) {
- if (!ent->dentry || d_mountpoint(ent->dentry)) {
- if (filldir(dirent,ent->name,ent->len,onr,ent->ino,DT_UNKNOWN) < 0)
- goto out;
- filp->f_pos = nr;
- }
- }
- break;
- }
-
-out:
- unlock_kernel();
- return 0;
-}
-
-static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, struct autofs_sb_info *sbi)
-{
- struct inode * inode;
- struct autofs_dir_ent *ent;
- int status = 0;
-
- if (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name))) {
- do {
- if (status && dentry->d_inode) {
- if (status != -ENOENT)
- printk("autofs warning: lookup failure on positive dentry, status = %d, name = %s\n", status, dentry->d_name.name);
- return 0; /* Try to get the kernel to invalidate this dentry */
- }
-
- /* Turn this into a real negative dentry? */
- if (status == -ENOENT) {
- dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT;
- dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
- return 1;
- } else if (status) {
- /* Return a negative dentry, but leave it "pending" */
- return 1;
- }
- status = autofs_wait(sbi, &dentry->d_name);
- } while (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name)));
- }
-
- /* Abuse this field as a pointer to the directory entry, used to
- find the expire list pointers */
- dentry->d_time = (unsigned long) ent;
-
- if (!dentry->d_inode) {
- inode = autofs_iget(sb, ent->ino);
- if (IS_ERR(inode)) {
- /* Failed, but leave pending for next time */
- return 1;
- }
- dentry->d_inode = inode;
- }
-
- /* If this is a directory that isn't a mount point, bitch at the
- daemon and fix it in user space */
- if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) {
- return !autofs_wait(sbi, &dentry->d_name);
- }
-
- /* We don't update the usages for the autofs daemon itself, this
- is necessary for recursive autofs mounts */
- if (!autofs_oz_mode(sbi)) {
- autofs_update_usage(&sbi->dirhash,ent);
- }
-
- dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
- return 1;
-}
-
-
-/*
- * Revalidate is called on every cache lookup. Some of those
- * cache lookups may actually happen while the dentry is not
- * yet completely filled in, and revalidate has to delay such
- * lookups..
- */
-static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd)
-{
- struct inode * dir;
- struct autofs_sb_info *sbi;
- struct autofs_dir_ent *ent;
- int res;
-
- lock_kernel();
- dir = dentry->d_parent->d_inode;
- sbi = autofs_sbi(dir->i_sb);
-
- /* Pending dentry */
- if (dentry->d_flags & DCACHE_AUTOFS_PENDING) {
- if (autofs_oz_mode(sbi))
- res = 1;
- else
- res = try_to_fill_dentry(dentry, dir->i_sb, sbi);
- unlock_kernel();
- return res;
- }
-
- /* Negative dentry.. invalidate if "old" */
- if (!dentry->d_inode) {
- unlock_kernel();
- return (dentry->d_time - jiffies <= AUTOFS_NEGATIVE_TIMEOUT);
- }
-
- /* Check for a non-mountpoint directory */
- if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) {
- if (autofs_oz_mode(sbi))
- res = 1;
- else
- res = try_to_fill_dentry(dentry, dir->i_sb, sbi);
- unlock_kernel();
- return res;
- }
-
- /* Update the usage list */
- if (!autofs_oz_mode(sbi)) {
- ent = (struct autofs_dir_ent *) dentry->d_time;
- if (ent)
- autofs_update_usage(&sbi->dirhash,ent);
- }
- unlock_kernel();
- return 1;
-}
-
-static const struct dentry_operations autofs_dentry_operations = {
- .d_revalidate = autofs_revalidate,
-};
-
-static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
-{
- struct autofs_sb_info *sbi;
- int oz_mode;
-
- DPRINTK(("autofs_root_lookup: name = "));
- lock_kernel();
- autofs_say(dentry->d_name.name,dentry->d_name.len);
-
- if (dentry->d_name.len > NAME_MAX) {
- unlock_kernel();
- return ERR_PTR(-ENAMETOOLONG);/* File name too long to exist */
- }
-
- sbi = autofs_sbi(dir->i_sb);
-
- oz_mode = autofs_oz_mode(sbi);
- DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, "
- "oz_mode = %d\n", task_pid_nr(current),
- task_pgrp_nr(current), sbi->catatonic,
- oz_mode));
-
- /*
- * Mark the dentry incomplete, but add it. This is needed so
- * that the VFS layer knows about the dentry, and we can count
- * on catching any lookups through the revalidate.
- *
- * Let all the hard work be done by the revalidate function that
- * needs to be able to do this anyway..
- *
- * We need to do this before we release the directory semaphore.
- */
- dentry->d_op = &autofs_dentry_operations;
- dentry->d_flags |= DCACHE_AUTOFS_PENDING;
- d_add(dentry, NULL);
-
- mutex_unlock(&dir->i_mutex);
- autofs_revalidate(dentry, nd);
- mutex_lock(&dir->i_mutex);
-
- /*
- * If we are still pending, check if we had to handle
- * a signal. If so we can force a restart..
- */
- if (dentry->d_flags & DCACHE_AUTOFS_PENDING) {
- /* See if we were interrupted */
- if (signal_pending(current)) {
- sigset_t *sigset = &current->pending.signal;
- if (sigismember (sigset, SIGKILL) ||
- sigismember (sigset, SIGQUIT) ||
- sigismember (sigset, SIGINT)) {
- unlock_kernel();
- return ERR_PTR(-ERESTARTNOINTR);
- }
- }
- }
- unlock_kernel();
-
- /*
- * If this dentry is unhashed, then we shouldn't honour this
- * lookup even if the dentry is positive. Returning ENOENT here
- * doesn't do the right thing for all system calls, but it should
- * be OK for the operations we permit from an autofs.
- */
- if (dentry->d_inode && d_unhashed(dentry))
- return ERR_PTR(-ENOENT);
-
- return NULL;
-}
-
-static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
-{
- struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
- struct autofs_dirhash *dh = &sbi->dirhash;
- struct autofs_dir_ent *ent;
- unsigned int n;
- int slsize;
- struct autofs_symlink *sl;
- struct inode *inode;
-
- DPRINTK(("autofs_root_symlink: %s <- ", symname));
- autofs_say(dentry->d_name.name,dentry->d_name.len);
-
- lock_kernel();
- if (!autofs_oz_mode(sbi)) {
- unlock_kernel();
- return -EACCES;
- }
-
- if (autofs_hash_lookup(dh, &dentry->d_name)) {
- unlock_kernel();
- return -EEXIST;
- }
-
- n = find_first_zero_bit(sbi->symlink_bitmap,AUTOFS_MAX_SYMLINKS);
- if (n >= AUTOFS_MAX_SYMLINKS) {
- unlock_kernel();
- return -ENOSPC;
- }
-
- set_bit(n,sbi->symlink_bitmap);
- sl = &sbi->symlink[n];
- sl->len = strlen(symname);
- sl->data = kmalloc(slsize = sl->len+1, GFP_KERNEL);
- if (!sl->data) {
- clear_bit(n,sbi->symlink_bitmap);
- unlock_kernel();
- return -ENOSPC;
- }
-
- ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL);
- if (!ent) {
- kfree(sl->data);
- clear_bit(n,sbi->symlink_bitmap);
- unlock_kernel();
- return -ENOSPC;
- }
-
- ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL);
- if (!ent->name) {
- kfree(sl->data);
- kfree(ent);
- clear_bit(n,sbi->symlink_bitmap);
- unlock_kernel();
- return -ENOSPC;
- }
-
- memcpy(sl->data,symname,slsize);
- sl->mtime = get_seconds();
-
- ent->ino = AUTOFS_FIRST_SYMLINK + n;
- ent->hash = dentry->d_name.hash;
- memcpy(ent->name, dentry->d_name.name, 1+(ent->len = dentry->d_name.len));
- ent->dentry = NULL; /* We don't keep the dentry for symlinks */
-
- autofs_hash_insert(dh,ent);
-
- inode = autofs_iget(dir->i_sb, ent->ino);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- d_instantiate(dentry, inode);
- unlock_kernel();
- return 0;
-}
-
-/*
- * NOTE!
- *
- * Normal filesystems would do a "d_delete()" to tell the VFS dcache
- * that the file no longer exists. However, doing that means that the
- * VFS layer can turn the dentry into a negative dentry, which we
- * obviously do not want (we're dropping the entry not because it
- * doesn't exist, but because it has timed out).
- *
- * Also see autofs_root_rmdir()..
- */
-static int autofs_root_unlink(struct inode *dir, struct dentry *dentry)
-{
- struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
- struct autofs_dirhash *dh = &sbi->dirhash;
- struct autofs_dir_ent *ent;
- unsigned int n;
-
- /* This allows root to remove symlinks */
- lock_kernel();
- if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) {
- unlock_kernel();
- return -EACCES;
- }
-
- ent = autofs_hash_lookup(dh, &dentry->d_name);
- if (!ent) {
- unlock_kernel();
- return -ENOENT;
- }
-
- n = ent->ino - AUTOFS_FIRST_SYMLINK;
- if (n >= AUTOFS_MAX_SYMLINKS) {
- unlock_kernel();
- return -EISDIR; /* It's a directory, dummy */
- }
- if (!test_bit(n,sbi->symlink_bitmap)) {
- unlock_kernel();
- return -EINVAL; /* Nonexistent symlink? Shouldn't happen */
- }
-
- dentry->d_time = (unsigned long)(struct autofs_dirhash *)NULL;
- autofs_hash_delete(ent);
- clear_bit(n,sbi->symlink_bitmap);
- kfree(sbi->symlink[n].data);
- d_drop(dentry);
-
- unlock_kernel();
- return 0;
-}
-
-static int autofs_root_rmdir(struct inode *dir, struct dentry *dentry)
-{
- struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
- struct autofs_dirhash *dh = &sbi->dirhash;
- struct autofs_dir_ent *ent;
-
- lock_kernel();
- if (!autofs_oz_mode(sbi)) {
- unlock_kernel();
- return -EACCES;
- }
-
- ent = autofs_hash_lookup(dh, &dentry->d_name);
- if (!ent) {
- unlock_kernel();
- return -ENOENT;
- }
-
- if ((unsigned int)ent->ino < AUTOFS_FIRST_DIR_INO) {
- unlock_kernel();
- return -ENOTDIR; /* Not a directory */
- }
-
- if (ent->dentry != dentry) {
- printk("autofs_rmdir: odentry != dentry for entry %s\n", dentry->d_name.name);
- }
-
- dentry->d_time = (unsigned long)(struct autofs_dir_ent *)NULL;
- autofs_hash_delete(ent);
- drop_nlink(dir);
- d_drop(dentry);
- unlock_kernel();
-
- return 0;
-}
-
-static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
- struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
- struct autofs_dirhash *dh = &sbi->dirhash;
- struct autofs_dir_ent *ent;
- struct inode *inode;
- ino_t ino;
-
- lock_kernel();
- if (!autofs_oz_mode(sbi)) {
- unlock_kernel();
- return -EACCES;
- }
-
- ent = autofs_hash_lookup(dh, &dentry->d_name);
- if (ent) {
- unlock_kernel();
- return -EEXIST;
- }
-
- if (sbi->next_dir_ino < AUTOFS_FIRST_DIR_INO) {
- printk("autofs: Out of inode numbers -- what the heck did you do??\n");
- unlock_kernel();
- return -ENOSPC;
- }
- ino = sbi->next_dir_ino++;
-
- ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL);
- if (!ent) {
- unlock_kernel();
- return -ENOSPC;
- }
-
- ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL);
- if (!ent->name) {
- kfree(ent);
- unlock_kernel();
- return -ENOSPC;
- }
-
- ent->hash = dentry->d_name.hash;
- memcpy(ent->name, dentry->d_name.name, 1+(ent->len = dentry->d_name.len));
- ent->ino = ino;
- ent->dentry = dentry;
- autofs_hash_insert(dh,ent);
-
- inc_nlink(dir);
-
- inode = autofs_iget(dir->i_sb, ino);
- if (IS_ERR(inode)) {
- drop_nlink(dir);
- return PTR_ERR(inode);
- }
-
- d_instantiate(dentry, inode);
- unlock_kernel();
-
- return 0;
-}
-
-/* Get/set timeout ioctl() operation */
-#ifdef CONFIG_COMPAT
-static inline int autofs_compat_get_set_timeout(struct autofs_sb_info *sbi,
- unsigned int __user *p)
-{
- unsigned long ntimeout;
-
- if (get_user(ntimeout, p) ||
- put_user(sbi->exp_timeout / HZ, p))
- return -EFAULT;
-
- if (ntimeout > UINT_MAX/HZ)
- sbi->exp_timeout = 0;
- else
- sbi->exp_timeout = ntimeout * HZ;
-
- return 0;
-}
-#endif
-
-static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi,
- unsigned long __user *p)
-{
- unsigned long ntimeout;
-
- if (get_user(ntimeout, p) ||
- put_user(sbi->exp_timeout / HZ, p))
- return -EFAULT;
-
- if (ntimeout > ULONG_MAX/HZ)
- sbi->exp_timeout = 0;
- else
- sbi->exp_timeout = ntimeout * HZ;
-
- return 0;
-}
-
-/* Return protocol version */
-static inline int autofs_get_protover(int __user *p)
-{
- return put_user(AUTOFS_PROTO_VERSION, p);
-}
-
-/* Perform an expiry operation */
-static inline int autofs_expire_run(struct super_block *sb,
- struct autofs_sb_info *sbi,
- struct vfsmount *mnt,
- struct autofs_packet_expire __user *pkt_p)
-{
- struct autofs_dir_ent *ent;
- struct autofs_packet_expire pkt;
-
- memset(&pkt,0,sizeof pkt);
-
- pkt.hdr.proto_version = AUTOFS_PROTO_VERSION;
- pkt.hdr.type = autofs_ptype_expire;
-
- if (!sbi->exp_timeout || !(ent = autofs_expire(sb,sbi,mnt)))
- return -EAGAIN;
-
- pkt.len = ent->len;
- memcpy(pkt.name, ent->name, pkt.len);
- pkt.name[pkt.len] = '\0';
-
- if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
- return -EFAULT;
-
- return 0;
-}
-
-/*
- * ioctl()'s on the root directory is the chief method for the daemon to
- * generate kernel reactions
- */
-static int autofs_do_root_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
-{
- struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
- void __user *argp = (void __user *)arg;
-
- DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,task_pgrp_nr(current)));
-
- if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
- _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
- return -ENOTTY;
-
- if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- switch(cmd) {
- case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */
- return autofs_wait_release(sbi,(autofs_wqt_t)arg,0);
- case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */
- return autofs_wait_release(sbi,(autofs_wqt_t)arg,-ENOENT);
- case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */
- autofs_catatonic_mode(sbi);
- return 0;
- case AUTOFS_IOC_PROTOVER: /* Get protocol version */
- return autofs_get_protover(argp);
-#ifdef CONFIG_COMPAT
- case AUTOFS_IOC_SETTIMEOUT32:
- return autofs_compat_get_set_timeout(sbi, argp);
-#endif
- case AUTOFS_IOC_SETTIMEOUT:
- return autofs_get_set_timeout(sbi, argp);
- case AUTOFS_IOC_EXPIRE:
- return autofs_expire_run(inode->i_sb, sbi, filp->f_path.mnt,
- argp);
- default:
- return -ENOSYS;
- }
-
-}
-
-static long autofs_root_ioctl(struct file *filp,
- unsigned int cmd, unsigned long arg)
-{
- int ret;
-
- lock_kernel();
- ret = autofs_do_root_ioctl(filp->f_path.dentry->d_inode,
- filp, cmd, arg);
- unlock_kernel();
-
- return ret;
-}
-
-#ifdef CONFIG_COMPAT
-static long autofs_root_compat_ioctl(struct file *filp,
- unsigned int cmd, unsigned long arg)
-{
- struct inode *inode = filp->f_path.dentry->d_inode;
- int ret;
-
- lock_kernel();
- if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
- ret = autofs_do_root_ioctl(inode, filp, cmd, arg);
- else
- ret = autofs_do_root_ioctl(inode, filp, cmd,
- (unsigned long)compat_ptr(arg));
- unlock_kernel();
-
- return ret;
-}
-#endif
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c
deleted file mode 100644
index 7ce9cb2..0000000
--- a/fs/autofs/symlink.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/symlink.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include "autofs_i.h"
-
-/* Nothing to release.. */
-static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- char *s=((struct autofs_symlink *)dentry->d_inode->i_private)->data;
- nd_set_link(nd, s);
- return NULL;
-}
-
-const struct inode_operations autofs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = autofs_follow_link
-};
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
deleted file mode 100644
index be46805..0000000
--- a/fs/autofs/waitq.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/waitq.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/signal.h>
-#include <linux/file.h>
-#include "autofs_i.h"
-
-/* We make this a static variable rather than a part of the superblock; it
- is better if we don't reassign numbers easily even across filesystems */
-static autofs_wqt_t autofs_next_wait_queue = 1;
-
-/* These are the signals we allow interrupting a pending mount */
-#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGQUIT))
-
-void autofs_catatonic_mode(struct autofs_sb_info *sbi)
-{
- struct autofs_wait_queue *wq, *nwq;
-
- DPRINTK(("autofs: entering catatonic mode\n"));
-
- sbi->catatonic = 1;
- wq = sbi->queues;
- sbi->queues = NULL; /* Erase all wait queues */
- while ( wq ) {
- nwq = wq->next;
- wq->status = -ENOENT; /* Magic is gone - report failure */
- kfree(wq->name);
- wq->name = NULL;
- wake_up(&wq->queue);
- wq = nwq;
- }
- fput(sbi->pipe); /* Close the pipe */
- sbi->pipe = NULL;
- autofs_hash_dputall(&sbi->dirhash); /* Remove all dentry pointers */
-}
-
-static int autofs_write(struct file *file, const void *addr, int bytes)
-{
- unsigned long sigpipe, flags;
- mm_segment_t fs;
- const char *data = (const char *)addr;
- ssize_t wr = 0;
-
- /** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/
-
- sigpipe = sigismember(&current->pending.signal, SIGPIPE);
-
- /* Save pointer to user space and point back to kernel space */
- fs = get_fs();
- set_fs(KERNEL_DS);
-
- while (bytes &&
- (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) {
- data += wr;
- bytes -= wr;
- }
-
- set_fs(fs);
-
- /* Keep the currently executing process from receiving a
- SIGPIPE unless it was already supposed to get one */
- if (wr == -EPIPE && !sigpipe) {
- spin_lock_irqsave(&current->sighand->siglock, flags);
- sigdelset(&current->pending.signal, SIGPIPE);
- recalc_sigpending();
- spin_unlock_irqrestore(&current->sighand->siglock, flags);
- }
-
- return (bytes > 0);
-}
-
-static void autofs_notify_daemon(struct autofs_sb_info *sbi, struct autofs_wait_queue *wq)
-{
- struct autofs_packet_missing pkt;
-
- DPRINTK(("autofs_wait: wait id = 0x%08lx, name = ", wq->wait_queue_token));
- autofs_say(wq->name,wq->len);
-
- memset(&pkt,0,sizeof pkt); /* For security reasons */
-
- pkt.hdr.proto_version = AUTOFS_PROTO_VERSION;
- pkt.hdr.type = autofs_ptype_missing;
- pkt.wait_queue_token = wq->wait_queue_token;
- pkt.len = wq->len;
- memcpy(pkt.name, wq->name, pkt.len);
- pkt.name[pkt.len] = '\0';
-
- if ( autofs_write(sbi->pipe,&pkt,sizeof(struct autofs_packet_missing)) )
- autofs_catatonic_mode(sbi);
-}
-
-int autofs_wait(struct autofs_sb_info *sbi, struct qstr *name)
-{
- struct autofs_wait_queue *wq;
- int status;
-
- /* In catatonic mode, we don't wait for nobody */
- if ( sbi->catatonic )
- return -ENOENT;
-
- /* We shouldn't be able to get here, but just in case */
- if ( name->len > NAME_MAX )
- return -ENOENT;
-
- for ( wq = sbi->queues ; wq ; wq = wq->next ) {
- if ( wq->hash == name->hash &&
- wq->len == name->len &&
- wq->name && !memcmp(wq->name,name->name,name->len) )
- break;
- }
-
- if ( !wq ) {
- /* Create a new wait queue */
- wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
- if ( !wq )
- return -ENOMEM;
-
- wq->name = kmalloc(name->len,GFP_KERNEL);
- if ( !wq->name ) {
- kfree(wq);
- return -ENOMEM;
- }
- wq->wait_queue_token = autofs_next_wait_queue++;
- init_waitqueue_head(&wq->queue);
- wq->hash = name->hash;
- wq->len = name->len;
- wq->status = -EINTR; /* Status return if interrupted */
- memcpy(wq->name, name->name, name->len);
- wq->next = sbi->queues;
- sbi->queues = wq;
-
- /* autofs_notify_daemon() may block */
- wq->wait_ctr = 2;
- autofs_notify_daemon(sbi,wq);
- } else
- wq->wait_ctr++;
-
- /* wq->name is NULL if and only if the lock is already released */
-
- if ( sbi->catatonic ) {
- /* We might have slept, so check again for catatonic mode */
- wq->status = -ENOENT;
- kfree(wq->name);
- wq->name = NULL;
- }
-
- if ( wq->name ) {
- /* Block all but "shutdown" signals while waiting */
- sigset_t sigmask;
-
- siginitsetinv(&sigmask, SHUTDOWN_SIGS);
- sigprocmask(SIG_BLOCK, &sigmask, &sigmask);
-
- interruptible_sleep_on(&wq->queue);
-
- sigprocmask(SIG_SETMASK, &sigmask, NULL);
- } else {
- DPRINTK(("autofs_wait: skipped sleeping\n"));
- }
-
- status = wq->status;
-
- if ( ! --wq->wait_ctr ) /* Are we the last process to need status? */
- kfree(wq);
-
- return status;
-}
-
-
-int autofs_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status)
-{
- struct autofs_wait_queue *wq, **wql;
-
- for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
- if ( wq->wait_queue_token == wait_queue_token )
- break;
- }
- if ( !wq )
- return -EINVAL;
-
- *wql = wq->next; /* Unlink from chain */
- kfree(wq->name);
- wq->name = NULL; /* Do not wait on this queue */
-
- wq->status = status;
-
- if ( ! --wq->wait_ctr ) /* Is anyone still waiting for this guy? */
- kfree(wq);
- else
- wake_up(&wq->queue);
-
- return 0;
-}
-
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 3d283ab..0fffe1c 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -16,6 +16,7 @@
#include <linux/auto_fs4.h>
#include <linux/auto_dev-ioctl.h>
#include <linux/mutex.h>
+#include <linux/spinlock.h>
#include <linux/list.h>
/* This is the range of ioctl() numbers we claim as ours */
@@ -60,6 +61,8 @@ do { \
current->pid, __func__, ##args); \
} while (0)
+extern spinlock_t autofs4_lock;
+
/* Unified info structure. This is pointed to by both the dentry and
inode structures. Each file in the filesystem has an instance of this
structure. It holds a reference to the dentry, so dentries are never
@@ -254,17 +257,15 @@ static inline int simple_positive(struct dentry *dentry)
return dentry->d_inode && !d_unhashed(dentry);
}
-static inline int __simple_empty(struct dentry *dentry)
+static inline void __autofs4_add_expiring(struct dentry *dentry)
{
- struct dentry *child;
- int ret = 0;
-
- list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
- if (simple_positive(child))
- goto out;
- ret = 1;
-out:
- return ret;
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ if (ino) {
+ if (list_empty(&ino->expiring))
+ list_add(&ino->expiring, &sbi->expiring_list);
+ }
+ return;
}
static inline void autofs4_add_expiring(struct dentry *dentry)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index a796c94..cc1d013 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -91,24 +91,64 @@ done:
}
/*
- * Calculate next entry in top down tree traversal.
- * From next_mnt in namespace.c - elegant.
+ * Calculate and dget next entry in top down tree traversal.
*/
-static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
+static struct dentry *get_next_positive_dentry(struct dentry *prev,
+ struct dentry *root)
{
- struct list_head *next = p->d_subdirs.next;
+ struct list_head *next;
+ struct dentry *p, *ret;
+
+ if (prev == NULL)
+ return dget(prev);
+ spin_lock(&autofs4_lock);
+relock:
+ p = prev;
+ spin_lock(&p->d_lock);
+again:
+ next = p->d_subdirs.next;
if (next == &p->d_subdirs) {
while (1) {
- if (p == root)
+ struct dentry *parent;
+
+ if (p == root) {
+ spin_unlock(&p->d_lock);
+ spin_unlock(&autofs4_lock);
+ dput(prev);
return NULL;
+ }
+
+ parent = p->d_parent;
+ if (!spin_trylock(&parent->d_lock)) {
+ spin_unlock(&p->d_lock);
+ cpu_relax();
+ goto relock;
+ }
+ spin_unlock(&p->d_lock);
next = p->d_u.d_child.next;
- if (next != &p->d_parent->d_subdirs)
+ p = parent;
+ if (next != &parent->d_subdirs)
break;
- p = p->d_parent;
}
}
- return list_entry(next, struct dentry, d_u.d_child);
+ ret = list_entry(next, struct dentry, d_u.d_child);
+
+ spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
+ /* Negative dentry - try next */
+ if (!simple_positive(ret)) {
+ spin_unlock(&ret->d_lock);
+ p = ret;
+ goto again;
+ }
+ dget_dlock(ret);
+ spin_unlock(&ret->d_lock);
+ spin_unlock(&p->d_lock);
+ spin_unlock(&autofs4_lock);
+
+ dput(prev);
+
+ return ret;
}
/*
@@ -158,18 +198,11 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
if (!simple_positive(top))
return 1;
- spin_lock(&dcache_lock);
- for (p = top; p; p = next_dentry(p, top)) {
- /* Negative dentry - give up */
- if (!simple_positive(p))
- continue;
-
+ p = NULL;
+ while ((p = get_next_positive_dentry(p, top))) {
DPRINTK("dentry %p %.*s",
p, (int) p->d_name.len, p->d_name.name);
- p = dget(p);
- spin_unlock(&dcache_lock);
-
/*
* Is someone visiting anywhere in the subtree ?
* If there's no mount we need to check the usage
@@ -198,16 +231,13 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
else
ino_count++;
- if (atomic_read(&p->d_count) > ino_count) {
+ if (p->d_count > ino_count) {
top_ino->last_used = jiffies;
dput(p);
return 1;
}
}
- dput(p);
- spin_lock(&dcache_lock);
}
- spin_unlock(&dcache_lock);
/* Timeout of a tree mount is ultimately determined by its top dentry */
if (!autofs4_can_expire(top, timeout, do_now))
@@ -226,32 +256,21 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
DPRINTK("parent %p %.*s",
parent, (int)parent->d_name.len, parent->d_name.name);
- spin_lock(&dcache_lock);
- for (p = parent; p; p = next_dentry(p, parent)) {
- /* Negative dentry - give up */
- if (!simple_positive(p))
- continue;
-
+ p = NULL;
+ while ((p = get_next_positive_dentry(p, parent))) {
DPRINTK("dentry %p %.*s",
p, (int) p->d_name.len, p->d_name.name);
- p = dget(p);
- spin_unlock(&dcache_lock);
-
if (d_mountpoint(p)) {
/* Can we umount this guy */
if (autofs4_mount_busy(mnt, p))
- goto cont;
+ continue;
/* Can we expire this guy */
if (autofs4_can_expire(p, timeout, do_now))
return p;
}
-cont:
- dput(p);
- spin_lock(&dcache_lock);
}
- spin_unlock(&dcache_lock);
return NULL;
}
@@ -276,7 +295,9 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
struct autofs_info *ino = autofs4_dentry_ino(root);
if (d_mountpoint(root)) {
ino->flags |= AUTOFS_INF_MOUNTPOINT;
- root->d_mounted--;
+ spin_lock(&root->d_lock);
+ root->d_flags &= ~DCACHE_MOUNTED;
+ spin_unlock(&root->d_lock);
}
ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete);
@@ -302,8 +323,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
{
unsigned long timeout;
struct dentry *root = sb->s_root;
+ struct dentry *dentry;
struct dentry *expired = NULL;
- struct list_head *next;
int do_now = how & AUTOFS_EXP_IMMEDIATE;
int exp_leaves = how & AUTOFS_EXP_LEAVES;
struct autofs_info *ino;
@@ -315,23 +336,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
now = jiffies;
timeout = sbi->exp_timeout;
- spin_lock(&dcache_lock);
- next = root->d_subdirs.next;
-
- /* On exit from the loop expire is set to a dgot dentry
- * to expire or it's NULL */
- while ( next != &root->d_subdirs ) {
- struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
-
- /* Negative dentry - give up */
- if (!simple_positive(dentry)) {
- next = next->next;
- continue;
- }
-
- dentry = dget(dentry);
- spin_unlock(&dcache_lock);
-
+ dentry = NULL;
+ while ((dentry = get_next_positive_dentry(dentry, root))) {
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
@@ -347,7 +353,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
/* Path walk currently on this dentry? */
ino_count = atomic_read(&ino->count) + 2;
- if (atomic_read(&dentry->d_count) > ino_count)
+ if (dentry->d_count > ino_count)
goto next;
/* Can we umount this guy */
@@ -369,7 +375,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
if (!exp_leaves) {
/* Path walk currently on this dentry? */
ino_count = atomic_read(&ino->count) + 1;
- if (atomic_read(&dentry->d_count) > ino_count)
+ if (dentry->d_count > ino_count)
goto next;
if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
@@ -383,7 +389,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
} else {
/* Path walk currently on this dentry? */
ino_count = atomic_read(&ino->count) + 1;
- if (atomic_read(&dentry->d_count) > ino_count)
+ if (dentry->d_count > ino_count)
goto next;
expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
@@ -394,11 +400,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
}
next:
spin_unlock(&sbi->fs_lock);
- dput(dentry);
- spin_lock(&dcache_lock);
- next = next->next;
}
- spin_unlock(&dcache_lock);
return NULL;
found:
@@ -408,9 +410,13 @@ found:
ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
+ spin_lock(&expired->d_parent->d_lock);
+ spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
- spin_unlock(&dcache_lock);
+ spin_unlock(&expired->d_lock);
+ spin_unlock(&expired->d_parent->d_lock);
+ spin_unlock(&autofs4_lock);
return expired;
}
@@ -499,7 +505,14 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
spin_lock(&sbi->fs_lock);
if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
- sb->s_root->d_mounted++;
+ spin_lock(&sb->s_root->d_lock);
+ /*
+ * If we haven't been expired away, then reset
+ * mounted status.
+ */
+ if (mnt->mnt_parent != mnt)
+ sb->s_root->d_flags |= DCACHE_MOUNTED;
+ spin_unlock(&sb->s_root->d_lock);
ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
}
ino->flags &= ~AUTOFS_INF_EXPIRING;
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 9722e4b..c038727 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -14,16 +14,16 @@
#include <linux/init.h>
#include "autofs_i.h"
-static int autofs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *autofs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt);
+ return mount_nodev(fs_type, flags, data, autofs4_fill_super);
}
static struct file_system_type autofs_fs_type = {
.owner = THIS_MODULE,
.name = "autofs",
- .get_sb = autofs_get_sb,
+ .mount = autofs_mount,
.kill_sb = autofs4_kill_sb,
};
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index ac87e49..a7bdb9d 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
goto fail_iput;
pipe = NULL;
- root->d_op = &autofs4_sb_dentry_operations;
+ d_set_d_op(root, &autofs4_sb_dentry_operations);
root->d_fsdata = ino;
/* Can this call block? */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d5c1401..651e4ef 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -23,6 +23,8 @@
#include "autofs_i.h"
+DEFINE_SPINLOCK(autofs4_lock);
+
static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
static int autofs4_dir_unlink(struct inode *,struct dentry *);
static int autofs4_dir_rmdir(struct inode *,struct dentry *);
@@ -142,12 +144,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
* autofs file system so just let the libfs routines handle
* it.
*/
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
+ spin_lock(&dentry->d_lock);
if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
return -ENOENT;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
out:
return dcache_dir_open(inode, file);
@@ -252,9 +257,11 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
/* We trigger a mount for almost all flags */
lookup_type = autofs4_need_mount(nd->flags);
spin_lock(&sbi->fs_lock);
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
+ spin_lock(&dentry->d_lock);
if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
spin_unlock(&sbi->fs_lock);
goto follow;
}
@@ -266,7 +273,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
*/
if (ino->flags & AUTOFS_INF_PENDING ||
(!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
spin_unlock(&sbi->fs_lock);
status = try_to_fill_dentry(dentry, nd->flags);
@@ -275,7 +283,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
goto follow;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
spin_unlock(&sbi->fs_lock);
follow:
/*
@@ -306,12 +315,19 @@ out_error:
*/
static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct inode *dir = dentry->d_parent->d_inode;
- struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
- int oz_mode = autofs4_oz_mode(sbi);
+ struct inode *dir;
+ struct autofs_sb_info *sbi;
+ int oz_mode;
int flags = nd ? nd->flags : 0;
int status = 1;
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ dir = dentry->d_parent->d_inode;
+ sbi = autofs4_sbi(dir->i_sb);
+ oz_mode = autofs4_oz_mode(sbi);
+
/* Pending dentry */
spin_lock(&sbi->fs_lock);
if (autofs4_ispending(dentry)) {
@@ -346,12 +362,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
return 0;
/* Check for a non-mountpoint directory with no contents */
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
+ spin_lock(&dentry->d_lock);
if (S_ISDIR(dentry->d_inode->i_mode) &&
!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
DPRINTK("dentry=%p %.*s, emptydir",
dentry, dentry->d_name.len, dentry->d_name.name);
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
/* The daemon never causes a mount to trigger */
if (oz_mode)
@@ -367,7 +385,8 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
return status;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
return 1;
}
@@ -422,7 +441,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
const unsigned char *str = name->name;
struct list_head *p, *head;
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
spin_lock(&sbi->lookup_lock);
head = &sbi->active_list;
list_for_each(p, head) {
@@ -436,7 +455,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
spin_lock(&active->d_lock);
/* Already gone? */
- if (atomic_read(&active->d_count) == 0)
+ if (active->d_count == 0)
goto next;
qstr = &active->d_name;
@@ -452,17 +471,17 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
goto next;
if (d_unhashed(active)) {
- dget(active);
+ dget_dlock(active);
spin_unlock(&active->d_lock);
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
return active;
}
next:
spin_unlock(&active->d_lock);
}
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
return NULL;
}
@@ -477,7 +496,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
const unsigned char *str = name->name;
struct list_head *p, *head;
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
spin_lock(&sbi->lookup_lock);
head = &sbi->expiring_list;
list_for_each(p, head) {
@@ -507,17 +526,17 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
goto next;
if (d_unhashed(expiring)) {
- dget(expiring);
+ dget_dlock(expiring);
spin_unlock(&expiring->d_lock);
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
return expiring;
}
next:
spin_unlock(&expiring->d_lock);
}
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
return NULL;
}
@@ -559,7 +578,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
* we check for the hashed dentry and return the newly
* hashed dentry.
*/
- dentry->d_op = &autofs4_root_dentry_operations;
+ d_set_d_op(dentry, &autofs4_root_dentry_operations);
/*
* And we need to ensure that the same dentry is used for
@@ -698,9 +717,9 @@ static int autofs4_dir_symlink(struct inode *dir,
d_add(dentry, inode);
if (dir == dir->i_sb->s_root->d_inode)
- dentry->d_op = &autofs4_root_dentry_operations;
+ d_set_d_op(dentry, &autofs4_root_dentry_operations);
else
- dentry->d_op = &autofs4_dentry_operations;
+ d_set_d_op(dentry, &autofs4_dentry_operations);
dentry->d_fsdata = ino;
ino->dentry = dget(dentry);
@@ -753,12 +772,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
dir->i_mtime = CURRENT_TIME;
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
autofs4_add_expiring(dentry);
spin_lock(&dentry->d_lock);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
return 0;
}
@@ -775,16 +794,20 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
if (!autofs4_oz_mode(sbi))
return -EACCES;
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
+ spin_lock(&sbi->lookup_lock);
+ spin_lock(&dentry->d_lock);
if (!list_empty(&dentry->d_subdirs)) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&sbi->lookup_lock);
+ spin_unlock(&autofs4_lock);
return -ENOTEMPTY;
}
- autofs4_add_expiring(dentry);
- spin_lock(&dentry->d_lock);
+ __autofs4_add_expiring(dentry);
+ spin_unlock(&sbi->lookup_lock);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
if (atomic_dec_and_test(&ino->count)) {
p_ino = autofs4_dentry_ino(dentry->d_parent);
@@ -829,9 +852,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
d_add(dentry, inode);
if (dir == dir->i_sb->s_root->d_inode)
- dentry->d_op = &autofs4_root_dentry_operations;
+ d_set_d_op(dentry, &autofs4_root_dentry_operations);
else
- dentry->d_op = &autofs4_dentry_operations;
+ d_set_d_op(dentry, &autofs4_dentry_operations);
dentry->d_fsdata = ino;
ino->dentry = dget(dentry);
@@ -980,19 +1003,11 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
}
}
-static DEFINE_MUTEX(autofs4_ioctl_mutex);
-
static long autofs4_root_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg)
{
- long ret;
struct inode *inode = filp->f_dentry->d_inode;
-
- mutex_lock(&autofs4_ioctl_mutex);
- ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
- mutex_unlock(&autofs4_ioctl_mutex);
-
- return ret;
+ return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
}
#ifdef CONFIG_COMPAT
@@ -1002,13 +1017,11 @@ static long autofs4_root_compat_ioctl(struct file *filp,
struct inode *inode = filp->f_path.dentry->d_inode;
int ret;
- mutex_lock(&autofs4_ioctl_mutex);
if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
else
ret = autofs4_root_ioctl_unlocked(inode, filp, cmd,
(unsigned long)compat_ptr(arg));
- mutex_unlock(&autofs4_ioctl_mutex);
return ret;
}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 2341375..c5f8459 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -186,16 +186,26 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
{
struct dentry *root = sbi->sb->s_root;
struct dentry *tmp;
- char *buf = *name;
+ char *buf;
char *p;
- int len = 0;
+ int len;
+ unsigned seq;
- spin_lock(&dcache_lock);
+rename_retry:
+ buf = *name;
+ len = 0;
+
+ seq = read_seqbegin(&rename_lock);
+ rcu_read_lock();
+ spin_lock(&autofs4_lock);
for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
len += tmp->d_name.len + 1;
if (!len || --len > NAME_MAX) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
+ rcu_read_unlock();
+ if (read_seqretry(&rename_lock, seq))
+ goto rename_retry;
return 0;
}
@@ -208,7 +218,10 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
p -= tmp->d_name.len;
strncpy(p, tmp->d_name.name, tmp->d_name.len);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
+ rcu_read_unlock();
+ if (read_seqretry(&rename_lock, seq))
+ goto rename_retry;
return len;
}
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index f024d8a..9ad2369 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -229,8 +229,11 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
return -EIO;
}
-static int bad_inode_permission(struct inode *inode, int mask)
+static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)
{
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
return -EIO;
}
diff --git a/fs/befs/endian.h b/fs/befs/endian.h
index 6cb84d8..2722387 100644
--- a/fs/befs/endian.h
+++ b/fs/befs/endian.h
@@ -102,22 +102,22 @@ cpu_to_fsrun(const struct super_block *sb, befs_block_run n)
}
static inline befs_data_stream
-fsds_to_cpu(const struct super_block *sb, befs_disk_data_stream n)
+fsds_to_cpu(const struct super_block *sb, const befs_disk_data_stream *n)
{
befs_data_stream data;
int i;
for (i = 0; i < BEFS_NUM_DIRECT_BLOCKS; ++i)
- data.direct[i] = fsrun_to_cpu(sb, n.direct[i]);
+ data.direct[i] = fsrun_to_cpu(sb, n->direct[i]);
- data.max_direct_range = fs64_to_cpu(sb, n.max_direct_range);
- data.indirect = fsrun_to_cpu(sb, n.indirect);
- data.max_indirect_range = fs64_to_cpu(sb, n.max_indirect_range);
- data.double_indirect = fsrun_to_cpu(sb, n.double_indirect);
+ data.max_direct_range = fs64_to_cpu(sb, n->max_direct_range);
+ data.indirect = fsrun_to_cpu(sb, n->indirect);
+ data.max_indirect_range = fs64_to_cpu(sb, n->max_indirect_range);
+ data.double_indirect = fsrun_to_cpu(sb, n->double_indirect);
data.max_double_indirect_range = fs64_to_cpu(sb,
- n.
+ n->
max_double_indirect_range);
- data.size = fs64_to_cpu(sb, n.size);
+ data.size = fs64_to_cpu(sb, n->size);
return data;
}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dc39d28..b1d0c79 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -284,12 +284,18 @@ befs_alloc_inode(struct super_block *sb)
return &bi->vfs_inode;
}
-static void
-befs_destroy_inode(struct inode *inode)
+static void befs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
}
+static void befs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, befs_i_callback);
+}
+
static void init_once(void *foo)
{
struct befs_inode_info *bi = (struct befs_inode_info *) foo;
@@ -384,7 +390,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
int num_blks;
befs_ino->i_data.ds =
- fsds_to_cpu(sb, raw_inode->data.datastream);
+ fsds_to_cpu(sb, &raw_inode->data.datastream);
num_blks = befs_count_blocks(sb, &befs_ino->i_data.ds);
inode->i_blocks =
@@ -913,18 +919,17 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static int
-befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+static struct dentry *
+befs_mount(struct file_system_type *fs_type, int flags, const char *dev_name,
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, befs_fill_super);
}
static struct file_system_type befs_fs_type = {
.owner = THIS_MODULE,
.name = "befs",
- .get_sb = befs_get_sb,
+ .mount = befs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 883e77a..a8e37f8 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -248,11 +248,18 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
return &bi->vfs_inode;
}
-static void bfs_destroy_inode(struct inode *inode)
+static void bfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
}
+static void bfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, bfs_i_callback);
+}
+
static void init_once(void *foo)
{
struct bfs_inode_info *bi = foo;
@@ -450,16 +457,16 @@ out:
return ret;
}
-static int bfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *bfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, bfs_fill_super);
}
static struct file_system_type bfs_fs_type = {
.owner = THIS_MODULE,
.name = "bfs",
- .get_sb = bfs_get_sb,
+ .mount = bfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6884e19..d5b640b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -66,12 +66,11 @@ static int elf_core_dump(struct coredump_params *cprm);
#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
static struct linux_binfmt elf_format = {
- .module = THIS_MODULE,
- .load_binary = load_elf_binary,
- .load_shlib = load_elf_library,
- .core_dump = elf_core_dump,
- .min_coredump = ELF_EXEC_PAGESIZE,
- .hasvdso = 1
+ .module = THIS_MODULE,
+ .load_binary = load_elf_binary,
+ .load_shlib = load_elf_library,
+ .core_dump = elf_core_dump,
+ .min_coredump = ELF_EXEC_PAGESIZE,
};
#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
@@ -316,8 +315,6 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
return 0;
}
-#ifndef elf_map
-
static unsigned long elf_map(struct file *filep, unsigned long addr,
struct elf_phdr *eppnt, int prot, int type,
unsigned long total_size)
@@ -354,8 +351,6 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
return(map_addr);
}
-#endif /* !elf_map */
-
static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
{
int i, first_idx = -1, last_idx = -1;
@@ -421,7 +416,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
goto out;
retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
- (char *)elf_phdata,size);
+ (char *)elf_phdata, size);
error = -EIO;
if (retval != size) {
if (retval < 0)
@@ -601,7 +596,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
goto out;
if (!elf_check_arch(&loc->elf_ex))
goto out;
- if (!bprm->file->f_op||!bprm->file->f_op->mmap)
+ if (!bprm->file->f_op || !bprm->file->f_op->mmap)
goto out;
/* Now read in all of the header information */
@@ -761,8 +756,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
/* There was a PT_LOAD segment with p_memsz > p_filesz
before this one. Map anonymous pages, if needed,
and clear the area. */
- retval = set_brk (elf_bss + load_bias,
- elf_brk + load_bias);
+ retval = set_brk(elf_bss + load_bias,
+ elf_brk + load_bias);
if (retval) {
send_sig(SIGKILL, current, 0);
goto out_free_dentry;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 29990f0..1befe2e 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -706,10 +706,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
return err;
}
-static int bm_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *bm_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_single(fs_type, flags, data, bm_fill_super, mnt);
+ return mount_single(fs_type, flags, data, bm_fill_super);
}
static struct linux_binfmt misc_format = {
@@ -720,7 +720,7 @@ static struct linux_binfmt misc_format = {
static struct file_system_type bm_fs_type = {
.owner = THIS_MODULE,
.name = "binfmt_misc",
- .get_sb = bm_get_sb,
+ .mount = bm_mount,
.kill_sb = kill_litter_super,
};
diff --git a/fs/bio.c b/fs/bio.c
index 8abb2df..4bd454f 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -370,6 +370,9 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
{
struct bio *bio;
+ if (nr_iovecs > UIO_MAXIOV)
+ return NULL;
+
bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
gfp_mask);
if (unlikely(!bio))
@@ -697,8 +700,12 @@ static void bio_free_map_data(struct bio_map_data *bmd)
static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
gfp_t gfp_mask)
{
- struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
+ struct bio_map_data *bmd;
+ if (iov_count > UIO_MAXIOV)
+ return NULL;
+
+ bmd = kmalloc(sizeof(*bmd), gfp_mask);
if (!bmd)
return NULL;
@@ -827,6 +834,12 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
start = uaddr >> PAGE_SHIFT;
+ /*
+ * Overflow, abort
+ */
+ if (end < start)
+ return ERR_PTR(-EINVAL);
+
nr_pages += end - start;
len += iov[i].iov_len;
}
@@ -955,6 +968,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;
+ /*
+ * Overflow, abort
+ */
+ if (end < start)
+ return ERR_PTR(-EINVAL);
+
nr_pages += end - start;
/*
* buffer must be aligned to at least hardsector size for now
@@ -982,7 +1001,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
unsigned long start = uaddr >> PAGE_SHIFT;
const int local_nr_pages = end - start;
const int page_limit = cur_page + local_nr_pages;
-
+
ret = get_user_pages_fast(uaddr, local_nr_pages,
write_to_vm, &pages[cur_page]);
if (ret < local_nr_pages) {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dea3b62..771f235 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -11,7 +11,6 @@
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/major.h>
-#include <linux/smp_lock.h>
#include <linux/device_cgroup.h>
#include <linux/highmem.h>
#include <linux/blkdev.h>
@@ -410,13 +409,20 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void bdev_destroy_inode(struct inode *inode)
+static void bdev_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(bdev_cachep, bdi);
}
+static void bdev_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, bdev_i_callback);
+}
+
static void init_once(void *foo)
{
struct bdev_inode *ei = (struct bdev_inode *) foo;
@@ -464,15 +470,15 @@ static const struct super_operations bdev_sops = {
.evict_inode = bdev_evict_inode,
};
-static int bd_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *bd_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt);
+ return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
}
static struct file_system_type bd_type = {
.name = "bdev",
- .get_sb = bd_get_sb,
+ .mount = bd_mount,
.kill_sb = kill_anon_super,
};
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 2222d16..6ae2c8c 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -185,18 +185,23 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
return ret;
}
-int btrfs_check_acl(struct inode *inode, int mask)
+int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags)
{
- struct posix_acl *acl;
int error = -EAGAIN;
- acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ error = -ECHILD;
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- error = posix_acl_permission(inode, acl, mask);
- posix_acl_release(acl);
+ } else {
+ struct posix_acl *acl;
+ acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ }
}
return error;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 396039b..b50bc4b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -91,23 +91,10 @@ static inline int compressed_bio_size(struct btrfs_root *root,
static struct bio *compressed_bio_alloc(struct block_device *bdev,
u64 first_byte, gfp_t gfp_flags)
{
- struct bio *bio;
int nr_vecs;
nr_vecs = bio_get_nr_vecs(bdev);
- bio = bio_alloc(gfp_flags, nr_vecs);
-
- if (bio == NULL && (current->flags & PF_MEMALLOC)) {
- while (!bio && (nr_vecs /= 2))
- bio = bio_alloc(gfp_flags, nr_vecs);
- }
-
- if (bio) {
- bio->bi_size = 0;
- bio->bi_bdev = bdev;
- bio->bi_sector = first_byte >> 9;
- }
- return bio;
+ return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
}
static int check_compressed_csum(struct inode *inode,
@@ -163,7 +150,6 @@ fail:
*/
static void end_compressed_bio_read(struct bio *bio, int err)
{
- struct extent_io_tree *tree;
struct compressed_bio *cb = bio->bi_private;
struct inode *inode;
struct page *page;
@@ -187,7 +173,6 @@ static void end_compressed_bio_read(struct bio *bio, int err)
/* ok, we're the last bio for this extent, lets start
* the decompression.
*/
- tree = &BTRFS_I(inode)->io_tree;
ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
cb->start,
cb->orig_bio->bi_io_vec,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c3df14c..9ac1715 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret, u64 new_root_objectid)
{
struct extent_buffer *cow;
- u32 nritems;
int ret = 0;
int level;
struct btrfs_disk_key disk_key;
@@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
WARN_ON(root->ref_cows && trans->transid != root->last_trans);
level = btrfs_header_level(buf);
- nritems = btrfs_header_nritems(buf);
if (level == 0)
btrfs_item_key(buf, &disk_key, 0);
else
@@ -1008,7 +1006,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
int wret;
int pslot;
int orig_slot = path->slots[level];
- int err_on_enospc = 0;
u64 orig_ptr;
if (level == 0)
@@ -1071,8 +1068,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
return 0;
- if (btrfs_header_nritems(mid) < 2)
- err_on_enospc = 1;
+ btrfs_header_nritems(mid);
left = read_node_slot(root, parent, pslot - 1);
if (left) {
@@ -1103,8 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
wret = push_node_left(trans, root, left, mid, 1);
if (wret < 0)
ret = wret;
- if (btrfs_header_nritems(mid) < 2)
- err_on_enospc = 1;
+ btrfs_header_nritems(mid);
}
/*
@@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
int wret;
int pslot;
int orig_slot = path->slots[level];
- u64 orig_ptr;
if (level == 0)
return 1;
mid = path->nodes[level];
WARN_ON(btrfs_header_generation(mid) != trans->transid);
- orig_ptr = btrfs_node_blockptr(mid, orig_slot);
if (level < BTRFS_MAX_LEVEL - 1)
parent = path->nodes[level + 1];
@@ -1577,13 +1570,33 @@ read_block_for_search(struct btrfs_trans_handle *trans,
blocksize = btrfs_level_size(root, level - 1);
tmp = btrfs_find_tree_block(root, blocknr, blocksize);
- if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
- /*
- * we found an up to date block without sleeping, return
- * right away
- */
- *eb_ret = tmp;
- return 0;
+ if (tmp) {
+ if (btrfs_buffer_uptodate(tmp, 0)) {
+ if (btrfs_buffer_uptodate(tmp, gen)) {
+ /*
+ * we found an up to date block without
+ * sleeping, return
+ * right away
+ */
+ *eb_ret = tmp;
+ return 0;
+ }
+ /* the pages were up to date, but we failed
+ * the generation number check. Do a full
+ * read for the generation number that is correct.
+ * We must do this without dropping locks so
+ * we can trust our generation number
+ */
+ free_extent_buffer(tmp);
+ tmp = read_tree_block(root, blocknr, blocksize, gen);
+ if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+ *eb_ret = tmp;
+ return 0;
+ }
+ free_extent_buffer(tmp);
+ btrfs_release_path(NULL, p);
+ return -EIO;
+ }
}
/*
@@ -1596,8 +1609,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
btrfs_unlock_up_safe(p, level + 1);
btrfs_set_path_blocking(p);
- if (tmp)
- free_extent_buffer(tmp);
+ free_extent_buffer(tmp);
if (p->reada)
reada_for_search(root, p, level, slot, key->objectid);
@@ -2548,7 +2560,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
{
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
- int slot;
int i;
int push_space = 0;
int push_items = 0;
@@ -2560,8 +2571,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
u32 this_item_size;
u32 old_left_item_size;
- slot = path->slots[1];
-
if (empty)
nr = min(right_nritems, max_slot);
else
@@ -3330,7 +3339,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
{
int ret = 0;
int slot;
- int slot_orig;
struct extent_buffer *leaf;
struct btrfs_item *item;
u32 nritems;
@@ -3340,7 +3348,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
unsigned int size_diff;
int i;
- slot_orig = path->slots[0];
leaf = path->nodes[0];
slot = path->slots[0];
@@ -3445,7 +3452,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
{
int ret = 0;
int slot;
- int slot_orig;
struct extent_buffer *leaf;
struct btrfs_item *item;
u32 nritems;
@@ -3454,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
unsigned int old_size;
int i;
- slot_orig = path->slots[0];
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
@@ -3787,7 +3792,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_key *cpu_key, u32 *data_size,
int nr)
{
- struct extent_buffer *leaf;
int ret = 0;
int slot;
int i;
@@ -3804,7 +3808,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
if (ret < 0)
goto out;
- leaf = path->nodes[0];
slot = path->slots[0];
BUG_ON(slot < 0);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index eaf286a..a142d20 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -99,6 +99,9 @@ struct btrfs_ordered_sum;
*/
#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
+/* For storing free space cache */
+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
+
/* dummy objectid represents multiple objectids */
#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
@@ -265,6 +268,22 @@ struct btrfs_chunk {
/* additional stripes go here */
} __attribute__ ((__packed__));
+#define BTRFS_FREE_SPACE_EXTENT 1
+#define BTRFS_FREE_SPACE_BITMAP 2
+
+struct btrfs_free_space_entry {
+ __le64 offset;
+ __le64 bytes;
+ u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_header {
+ struct btrfs_disk_key location;
+ __le64 generation;
+ __le64 num_entries;
+ __le64 num_bitmaps;
+} __attribute__ ((__packed__));
+
static inline unsigned long btrfs_chunk_item_size(int num_stripes)
{
BUG_ON(num_stripes == 0);
@@ -365,8 +384,10 @@ struct btrfs_super_block {
char label[BTRFS_LABEL_SIZE];
+ __le64 cache_generation;
+
/* future expansion */
- __le64 reserved[32];
+ __le64 reserved[31];
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
} __attribute__ ((__packed__));
@@ -375,13 +396,15 @@ struct btrfs_super_block {
* ones specified below then we will fail to mount
*/
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
-#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0)
+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
-#define BTRFS_FEATURE_INCOMPAT_SUPP \
- (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
- BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
+#define BTRFS_FEATURE_INCOMPAT_SUPP \
+ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
+ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
+ BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
/*
* A leaf is full of items. offset and size tell us where to find
@@ -675,7 +698,8 @@ struct btrfs_block_group_item {
struct btrfs_space_info {
u64 flags;
- u64 total_bytes; /* total bytes in the space */
+ u64 total_bytes; /* total bytes in the space,
+ this doesn't take mirrors into account */
u64 bytes_used; /* total bytes used,
this does't take mirrors into account */
u64 bytes_pinned; /* total bytes pinned, will be freed when the
@@ -687,6 +711,8 @@ struct btrfs_space_info {
u64 bytes_may_use; /* number of bytes that may be used for
delalloc/allocations */
u64 disk_used; /* total bytes used on disk */
+ u64 disk_total; /* total bytes on disk, takes mirrors into
+ account */
int full; /* indicates that we cannot allocate any more
chunks for this space */
@@ -750,6 +776,14 @@ enum btrfs_caching_type {
BTRFS_CACHE_FINISHED = 2,
};
+enum btrfs_disk_cache_state {
+ BTRFS_DC_WRITTEN = 0,
+ BTRFS_DC_ERROR = 1,
+ BTRFS_DC_CLEAR = 2,
+ BTRFS_DC_SETUP = 3,
+ BTRFS_DC_NEED_WRITE = 4,
+};
+
struct btrfs_caching_control {
struct list_head list;
struct mutex mutex;
@@ -763,6 +797,7 @@ struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
struct btrfs_fs_info *fs_info;
+ struct inode *inode;
spinlock_t lock;
u64 pinned;
u64 reserved;
@@ -773,8 +808,11 @@ struct btrfs_block_group_cache {
int extents_thresh;
int free_extents;
int total_bitmaps;
- int ro;
- int dirty;
+ unsigned int ro:1;
+ unsigned int dirty:1;
+ unsigned int iref:1;
+
+ int disk_cache_state;
/* cache tracking stuff */
int cached;
@@ -863,6 +901,7 @@ struct btrfs_fs_info {
struct btrfs_transaction *running_transaction;
wait_queue_head_t transaction_throttle;
wait_queue_head_t transaction_wait;
+ wait_queue_head_t transaction_blocked_wait;
wait_queue_head_t async_submit_wait;
struct btrfs_super_block super_copy;
@@ -949,6 +988,7 @@ struct btrfs_fs_info {
struct btrfs_workers endio_meta_workers;
struct btrfs_workers endio_meta_write_workers;
struct btrfs_workers endio_write_workers;
+ struct btrfs_workers endio_freespace_worker;
struct btrfs_workers submit_workers;
/*
* fixup workers take dirty pages that didn't properly go through
@@ -1192,6 +1232,9 @@ struct btrfs_root {
#define BTRFS_MOUNT_NOSSD (1 << 9)
#define BTRFS_MOUNT_DISCARD (1 << 10)
#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
+#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
+#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
+#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1665,6 +1708,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
write_eb_member(eb, item, struct btrfs_dir_item, location, key);
}
+BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
+ num_entries, 64);
+BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
+ num_bitmaps, 64);
+BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
+ generation, 64);
+
+static inline void btrfs_free_space_key(struct extent_buffer *eb,
+ struct btrfs_free_space_header *h,
+ struct btrfs_disk_key *key)
+{
+ read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
+ struct btrfs_free_space_header *h,
+ struct btrfs_disk_key *key)
+{
+ write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
/* struct btrfs_disk_key */
BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
objectid, 64);
@@ -1876,6 +1940,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
incompat_flags, 64);
BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
csum_type, 16);
+BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+ cache_generation, 64);
static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
{
@@ -1988,6 +2054,12 @@ static inline struct dentry *fdentry(struct file *file)
return file->f_path.dentry;
}
+static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
+{
+ return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
+}
+
/* extent-tree.c */
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2079,7 +2151,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- int num_items, int *retries);
+ int num_items);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2100,7 +2172,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, int *retries);
+ u64 num_bytes);
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
@@ -2115,6 +2187,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -2373,7 +2446,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+ int sync);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_writepages(struct address_space *mapping,
@@ -2426,6 +2500,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root);
int btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
+int btrfs_prealloc_file_range_trans(struct inode *inode,
+ struct btrfs_trans_handle *trans, int mode,
+ u64 start, u64 num_bytes, u64 min_size,
+ loff_t actual_len, u64 *alloc_hint);
extern const struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */
@@ -2466,7 +2544,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
/* acl.c */
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
-int btrfs_check_acl(struct inode *inode, int mask);
+int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags);
#else
#define btrfs_check_acl NULL
#endif
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index e9103b3..f0cad5a 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
ret = btrfs_truncate_item(trans, root, path,
item_len - sub_item_len, 1);
}
- return 0;
+ return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5e789f4..51d2e4d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -28,6 +28,7 @@
#include <linux/freezer.h>
#include <linux/crc32c.h>
#include <linux/slab.h>
+#include <linux/migrate.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
@@ -338,7 +339,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
struct extent_io_tree *tree;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 found_start;
- int found_level;
unsigned long len;
struct extent_buffer *eb;
int ret;
@@ -356,6 +356,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
btrfs_header_generation(eb));
BUG_ON(ret);
+ WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
+
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
WARN_ON(1);
@@ -369,8 +371,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
WARN_ON(1);
goto err;
}
- found_level = btrfs_header_level(eb);
-
csum_tree_block(root, eb, 0);
err:
free_extent_buffer(eb);
@@ -481,9 +481,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
end_io_wq->work.flags = 0;
if (bio->bi_rw & REQ_WRITE) {
- if (end_io_wq->metadata)
+ if (end_io_wq->metadata == 1)
btrfs_queue_worker(&fs_info->endio_meta_write_workers,
&end_io_wq->work);
+ else if (end_io_wq->metadata == 2)
+ btrfs_queue_worker(&fs_info->endio_freespace_worker,
+ &end_io_wq->work);
else
btrfs_queue_worker(&fs_info->endio_write_workers,
&end_io_wq->work);
@@ -497,6 +500,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
}
}
+/*
+ * For the metadata arg you want
+ *
+ * 0 - if data
+ * 1 - if normal metadta
+ * 2 - if writing to the free space cache area
+ */
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata)
{
@@ -533,11 +543,9 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
static void run_one_async_start(struct btrfs_work *work)
{
- struct btrfs_fs_info *fs_info;
struct async_submit_bio *async;
async = container_of(work, struct async_submit_bio, work);
- fs_info = BTRFS_I(async->inode)->root->fs_info;
async->submit_bio_start(async->inode, async->rw, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
@@ -688,6 +696,27 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
__btree_submit_bio_done);
}
+#ifdef CONFIG_MIGRATION
+static int btree_migratepage(struct address_space *mapping,
+ struct page *newpage, struct page *page)
+{
+ /*
+ * we can't safely write a btree page from here,
+ * we haven't done the locking hook
+ */
+ if (PageDirty(page))
+ return -EAGAIN;
+ /*
+ * Buffers may be managed in a filesystem specific way.
+ * We must have no buffers or drop them.
+ */
+ if (page_has_private(page) &&
+ !try_to_release_page(page, GFP_KERNEL))
+ return -EAGAIN;
+ return migrate_page(mapping, newpage, page);
+}
+#endif
+
static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
struct extent_io_tree *tree;
@@ -702,8 +731,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc)
}
redirty_page_for_writepage(wbc, page);
- eb = btrfs_find_tree_block(root, page_offset(page),
- PAGE_CACHE_SIZE);
+ eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
WARN_ON(!eb);
was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
@@ -794,6 +822,9 @@ static const struct address_space_operations btree_aops = {
.releasepage = btree_releasepage,
.invalidatepage = btree_invalidatepage,
.sync_page = block_sync_page,
+#ifdef CONFIG_MIGRATION
+ .migratepage = btree_migratepage,
+#endif
};
int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -850,12 +881,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
u32 blocksize, u64 parent_transid)
{
struct extent_buffer *buf = NULL;
- struct inode *btree_inode = root->fs_info->btree_inode;
- struct extent_io_tree *io_tree;
int ret;
- io_tree = &BTRFS_I(btree_inode)->io_tree;
-
buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
if (!buf)
return NULL;
@@ -980,7 +1007,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
- BUG_ON(!root->node);
+ if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
+ free_extent_buffer(root->node);
+ return -EIO;
+ }
root->commit_root = btrfs_root_node(root);
return 0;
}
@@ -1377,7 +1407,6 @@ static int bio_ready_for_csum(struct bio *bio)
u64 start = 0;
struct page *page;
struct extent_io_tree *io_tree = NULL;
- struct btrfs_fs_info *info = NULL;
struct bio_vec *bvec;
int i;
int ret;
@@ -1396,7 +1425,6 @@ static int bio_ready_for_csum(struct bio *bio)
buf_len = page->private >> 2;
start = page_offset(page) + bvec->bv_offset;
io_tree = &BTRFS_I(page->mapping->host)->io_tree;
- info = BTRFS_I(page->mapping->host)->root->fs_info;
}
/* are we fully contained in this bio? */
if (buf_len <= length)
@@ -1539,10 +1567,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
GFP_NOFS);
struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
GFP_NOFS);
- struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
- GFP_NOFS);
- struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
- GFP_NOFS);
+ struct btrfs_root *tree_root = btrfs_sb(sb);
+ struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
GFP_NOFS);
struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
@@ -1680,12 +1706,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_throttle);
init_waitqueue_head(&fs_info->transaction_wait);
+ init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
__setup_root(4096, 4096, 4096, 4096, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
-
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
if (!bh)
goto fail_iput;
@@ -1775,6 +1801,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
fs_info->thread_pool_size,
&fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
+ 1, &fs_info->generic_worker);
/*
* endios are largely parallel and should have a very
@@ -1795,6 +1823,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_start_workers(&fs_info->endio_meta_workers, 1);
btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
btrfs_start_workers(&fs_info->endio_write_workers, 1);
+ btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1993,6 +2022,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!(sb->s_flags & MS_RDONLY)) {
down_read(&fs_info->cleanup_work_sem);
btrfs_orphan_cleanup(fs_info->fs_root);
+ btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem);
}
@@ -2035,6 +2065,7 @@ fail_sb_buffer:
btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
+ btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
fail_iput:
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@@ -2410,6 +2441,7 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
smp_mb();
+ btrfs_put_block_group_cache(fs_info);
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
@@ -2456,6 +2488,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
+ btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_close_devices(fs_info->fs_devices);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 951ef09..0ccf9a8 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
dentry = d_obtain_alias(inode);
if (!IS_ERR(dentry))
- dentry->d_op = &btrfs_dentry_operations;
+ d_set_d_op(dentry, &btrfs_dentry_operations);
return dentry;
fail:
srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -166,7 +166,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
static struct dentry *btrfs_get_parent(struct dentry *child)
{
struct inode *dir = child->d_inode;
- static struct dentry *dentry;
+ struct dentry *dentry;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_path *path;
struct extent_buffer *leaf;
@@ -225,16 +225,92 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
key.offset = 0;
dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
if (!IS_ERR(dentry))
- dentry->d_op = &btrfs_dentry_operations;
+ d_set_d_op(dentry, &btrfs_dentry_operations);
return dentry;
fail:
btrfs_free_path(path);
return ERR_PTR(ret);
}
+static int btrfs_get_name(struct dentry *parent, char *name,
+ struct dentry *child)
+{
+ struct inode *inode = child->d_inode;
+ struct inode *dir = parent->d_inode;
+ struct btrfs_path *path;
+ struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct btrfs_inode_ref *iref;
+ struct btrfs_root_ref *rref;
+ struct extent_buffer *leaf;
+ unsigned long name_ptr;
+ struct btrfs_key key;
+ int name_len;
+ int ret;
+
+ if (!dir || !inode)
+ return -EINVAL;
+
+ if (!S_ISDIR(dir->i_mode))
+ return -EINVAL;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->leave_spinning = 1;
+
+ if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+ key.objectid = BTRFS_I(inode)->root->root_key.objectid;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = (u64)-1;
+ root = root->fs_info->tree_root;
+ } else {
+ key.objectid = inode->i_ino;
+ key.offset = dir->i_ino;
+ key.type = BTRFS_INODE_REF_KEY;
+ }
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ btrfs_free_path(path);
+ return ret;
+ } else if (ret > 0) {
+ if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+ path->slots[0]--;
+ } else {
+ btrfs_free_path(path);
+ return -ENOENT;
+ }
+ }
+ leaf = path->nodes[0];
+
+ if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+ rref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_root_ref);
+ name_ptr = (unsigned long)(rref + 1);
+ name_len = btrfs_root_ref_name_len(leaf, rref);
+ } else {
+ iref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_inode_ref);
+ name_ptr = (unsigned long)(iref + 1);
+ name_len = btrfs_inode_ref_name_len(leaf, iref);
+ }
+
+ read_extent_buffer(leaf, name, name_ptr, name_len);
+ btrfs_free_path(path);
+
+ /*
+ * have to add the null termination to make sure that reconnect_path
+ * gets the right len for strlen
+ */
+ name[name_len] = '\0';
+
+ return 0;
+}
+
const struct export_operations btrfs_export_ops = {
.encode_fh = btrfs_encode_fh,
.fh_to_dentry = btrfs_fh_to_dentry,
.fh_to_parent = btrfs_fh_to_parent,
.get_parent = btrfs_get_parent,
+ .get_name = btrfs_get_name,
};
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0b81ecd..227e581 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache)
return NULL;
}
+ /* We're loading it the fast way, so we don't have a caching_ctl. */
+ if (!cache->caching_ctl) {
+ spin_unlock(&cache->lock);
+ return NULL;
+ }
+
ctl = cache->caching_ctl;
atomic_inc(&ctl->count);
spin_unlock(&cache->lock);
@@ -421,7 +427,10 @@ err:
return 0;
}
-static int cache_block_group(struct btrfs_block_group_cache *cache)
+static int cache_block_group(struct btrfs_block_group_cache *cache,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ int load_cache_only)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl;
@@ -432,6 +441,39 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
if (cache->cached != BTRFS_CACHE_NO)
return 0;
+ /*
+ * We can't do the read from on-disk cache during a commit since we need
+ * to have the normal tree locking. Also if we are currently trying to
+ * allocate blocks for the tree root we can't do the fast caching since
+ * we likely hold important locks.
+ */
+ if (!trans->transaction->in_commit &&
+ (root && root != root->fs_info->tree_root)) {
+ spin_lock(&cache->lock);
+ if (cache->cached != BTRFS_CACHE_NO) {
+ spin_unlock(&cache->lock);
+ return 0;
+ }
+ cache->cached = BTRFS_CACHE_STARTED;
+ spin_unlock(&cache->lock);
+
+ ret = load_free_space_cache(fs_info, cache);
+
+ spin_lock(&cache->lock);
+ if (ret == 1) {
+ cache->cached = BTRFS_CACHE_FINISHED;
+ cache->last_byte_to_unpin = (u64)-1;
+ } else {
+ cache->cached = BTRFS_CACHE_NO;
+ }
+ spin_unlock(&cache->lock);
+ if (ret == 1)
+ return 0;
+ }
+
+ if (load_cache_only)
+ return 0;
+
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
BUG_ON(!caching_ctl);
@@ -509,7 +551,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
- if (found->flags == flags) {
+ if (found->flags & flags) {
rcu_read_unlock();
return found;
}
@@ -542,6 +584,15 @@ static u64 div_factor(u64 num, int factor)
return num;
}
+static u64 div_factor_fine(u64 num, int factor)
+{
+ if (factor == 100)
+ return num;
+ num *= factor;
+ do_div(num, 100);
+ return num;
+}
+
u64 btrfs_find_block_group(struct btrfs_root *root,
u64 search_start, u64 search_hint, int owner)
{
@@ -2687,6 +2738,111 @@ next_block_group(struct btrfs_root *root,
return cache;
}
+static int cache_save_setup(struct btrfs_block_group_cache *block_group,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *root = block_group->fs_info->tree_root;
+ struct inode *inode = NULL;
+ u64 alloc_hint = 0;
+ int dcs = BTRFS_DC_ERROR;
+ int num_pages = 0;
+ int retries = 0;
+ int ret = 0;
+
+ /*
+ * If this block group is smaller than 100 megs don't bother caching the
+ * block group.
+ */
+ if (block_group->key.offset < (100 * 1024 * 1024)) {
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = BTRFS_DC_WRITTEN;
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+
+again:
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
+ ret = PTR_ERR(inode);
+ btrfs_release_path(root, path);
+ goto out;
+ }
+
+ if (IS_ERR(inode)) {
+ BUG_ON(retries);
+ retries++;
+
+ if (block_group->ro)
+ goto out_free;
+
+ ret = create_free_space_inode(root, trans, block_group, path);
+ if (ret)
+ goto out_free;
+ goto again;
+ }
+
+ /*
+ * We want to set the generation to 0, that way if anything goes wrong
+ * from here on out we know not to trust this cache when we load up next
+ * time.
+ */
+ BTRFS_I(inode)->generation = 0;
+ ret = btrfs_update_inode(trans, root, inode);
+ WARN_ON(ret);
+
+ if (i_size_read(inode) > 0) {
+ ret = btrfs_truncate_free_space_cache(root, trans, path,
+ inode);
+ if (ret)
+ goto out_put;
+ }
+
+ spin_lock(&block_group->lock);
+ if (block_group->cached != BTRFS_CACHE_FINISHED) {
+ /* We're not cached, don't bother trying to write stuff out */
+ dcs = BTRFS_DC_WRITTEN;
+ spin_unlock(&block_group->lock);
+ goto out_put;
+ }
+ spin_unlock(&block_group->lock);
+
+ num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
+ if (!num_pages)
+ num_pages = 1;
+
+ /*
+ * Just to make absolutely sure we have enough space, we're going to
+ * preallocate 12 pages worth of space for each block group. In
+ * practice we ought to use at most 8, but we need extra space so we can
+ * add our header and have a terminator between the extents and the
+ * bitmaps.
+ */
+ num_pages *= 16;
+ num_pages *= PAGE_CACHE_SIZE;
+
+ ret = btrfs_check_data_free_space(inode, num_pages);
+ if (ret)
+ goto out_put;
+
+ ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
+ num_pages, num_pages,
+ &alloc_hint);
+ if (!ret)
+ dcs = BTRFS_DC_SETUP;
+ btrfs_free_reserved_data_space(inode, num_pages);
+out_put:
+ iput(inode);
+out_free:
+ btrfs_release_path(root, path);
+out:
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = dcs;
+ spin_unlock(&block_group->lock);
+
+ return ret;
+}
+
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
@@ -2699,6 +2855,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
+again:
+ while (1) {
+ cache = btrfs_lookup_first_block_group(root->fs_info, last);
+ while (cache) {
+ if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+ break;
+ cache = next_block_group(root, cache);
+ }
+ if (!cache) {
+ if (last == 0)
+ break;
+ last = 0;
+ continue;
+ }
+ err = cache_save_setup(cache, trans, path);
+ last = cache->key.objectid + cache->key.offset;
+ btrfs_put_block_group(cache);
+ }
+
while (1) {
if (last == 0) {
err = btrfs_run_delayed_refs(trans, root,
@@ -2708,6 +2883,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
cache = btrfs_lookup_first_block_group(root->fs_info, last);
while (cache) {
+ if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
+ btrfs_put_block_group(cache);
+ goto again;
+ }
+
if (cache->dirty)
break;
cache = next_block_group(root, cache);
@@ -2719,6 +2899,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
continue;
}
+ if (cache->disk_cache_state == BTRFS_DC_SETUP)
+ cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
cache->dirty = 0;
last = cache->key.objectid + cache->key.offset;
@@ -2727,6 +2909,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
btrfs_put_block_group(cache);
}
+ while (1) {
+ /*
+ * I don't think this is needed since we're just marking our
+ * preallocated extent as written, but just in case it can't
+ * hurt.
+ */
+ if (last == 0) {
+ err = btrfs_run_delayed_refs(trans, root,
+ (unsigned long)-1);
+ BUG_ON(err);
+ }
+
+ cache = btrfs_lookup_first_block_group(root->fs_info, last);
+ while (cache) {
+ /*
+ * Really this shouldn't happen, but it could if we
+ * couldn't write the entire preallocated extent and
+ * splitting the extent resulted in a new block.
+ */
+ if (cache->dirty) {
+ btrfs_put_block_group(cache);
+ goto again;
+ }
+ if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+ break;
+ cache = next_block_group(root, cache);
+ }
+ if (!cache) {
+ if (last == 0)
+ break;
+ last = 0;
+ continue;
+ }
+
+ btrfs_write_out_cache(root, trans, cache, path);
+
+ /*
+ * If we didn't have an error then the cache state is still
+ * NEED_WRITE, so we can set it to WRITTEN.
+ */
+ if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+ cache->disk_cache_state = BTRFS_DC_WRITTEN;
+ last = cache->key.objectid + cache->key.offset;
+ btrfs_put_block_group(cache);
+ }
+
btrfs_free_path(path);
return 0;
}
@@ -2762,6 +2990,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
if (found) {
spin_lock(&found->lock);
found->total_bytes += total_bytes;
+ found->disk_total += total_bytes * factor;
found->bytes_used += bytes_used;
found->disk_used += bytes_used * factor;
found->full = 0;
@@ -2781,6 +3010,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
BTRFS_BLOCK_GROUP_SYSTEM |
BTRFS_BLOCK_GROUP_METADATA);
found->total_bytes = total_bytes;
+ found->disk_total = total_bytes * factor;
found->bytes_used = bytes_used;
found->disk_used = bytes_used * factor;
found->bytes_pinned = 0;
@@ -2813,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
- u64 num_devices = root->fs_info->fs_devices->rw_devices;
+ /*
+ * we add in the count of missing devices because we want
+ * to make sure that any RAID levels on a degraded FS
+ * continue to be honored.
+ */
+ u64 num_devices = root->fs_info->fs_devices->rw_devices +
+ root->fs_info->fs_devices->missing_devices;
if (num_devices == 1)
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
@@ -2882,11 +3118,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 used;
- int ret = 0, committed = 0;
+ int ret = 0, committed = 0, alloc_chunk = 1;
/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
+ if (root == root->fs_info->tree_root) {
+ alloc_chunk = 0;
+ committed = 1;
+ }
+
data_sinfo = BTRFS_I(inode)->space_info;
if (!data_sinfo)
goto alloc;
@@ -2905,7 +3146,7 @@ again:
* if we don't have enough free bytes in this space then we need
* to alloc a new chunk.
*/
- if (!data_sinfo->full) {
+ if (!data_sinfo->full && alloc_chunk) {
u64 alloc_target;
data_sinfo->force_alloc = 1;
@@ -2997,10 +3238,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
rcu_read_unlock();
}
-static int should_alloc_chunk(struct btrfs_space_info *sinfo,
- u64 alloc_bytes)
+static int should_alloc_chunk(struct btrfs_root *root,
+ struct btrfs_space_info *sinfo, u64 alloc_bytes)
{
u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+ u64 thresh;
if (sinfo->bytes_used + sinfo->bytes_reserved +
alloc_bytes + 256 * 1024 * 1024 < num_bytes)
@@ -3010,6 +3252,12 @@ static int should_alloc_chunk(struct btrfs_space_info *sinfo,
alloc_bytes < div_factor(num_bytes, 8))
return 0;
+ thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+ thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
+
+ if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
+ return 0;
+
return 1;
}
@@ -3041,13 +3289,21 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
goto out;
}
- if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
+ if (!force && !should_alloc_chunk(extent_root, space_info,
+ alloc_bytes)) {
spin_unlock(&space_info->lock);
goto out;
}
spin_unlock(&space_info->lock);
/*
+ * If we have mixed data/metadata chunks we want to make sure we keep
+ * allocating mixed chunks instead of individual chunks.
+ */
+ if (btrfs_mixed_space_info(space_info))
+ flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
+
+ /*
* if we're doing a data chunk, go ahead and make sure that
* we keep a reasonable number of metadata chunks allocated in the
* FS as well.
@@ -3072,55 +3328,25 @@ out:
return ret;
}
-static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_space_info *sinfo, u64 num_bytes)
-{
- int ret;
- int end_trans = 0;
-
- if (sinfo->full)
- return 0;
-
- spin_lock(&sinfo->lock);
- ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
- spin_unlock(&sinfo->lock);
- if (!ret)
- return 0;
-
- if (!trans) {
- trans = btrfs_join_transaction(root, 1);
- BUG_ON(IS_ERR(trans));
- end_trans = 1;
- }
-
- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- num_bytes + 2 * 1024 * 1024,
- get_alloc_profile(root, sinfo->flags), 0);
-
- if (end_trans)
- btrfs_end_transaction(trans, root);
-
- return ret == 1 ? 1 : 0;
-}
-
/*
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 to_reclaim)
+ struct btrfs_root *root, u64 to_reclaim, int sync)
{
struct btrfs_block_rsv *block_rsv;
+ struct btrfs_space_info *space_info;
u64 reserved;
u64 max_reclaim;
u64 reclaimed = 0;
int pause = 1;
- int ret;
+ int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
block_rsv = &root->fs_info->delalloc_block_rsv;
- spin_lock(&block_rsv->lock);
- reserved = block_rsv->reserved;
- spin_unlock(&block_rsv->lock);
+ space_info = block_rsv->space_info;
+
+ smp_mb();
+ reserved = space_info->bytes_reserved;
if (reserved == 0)
return 0;
@@ -3128,104 +3354,169 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
max_reclaim = min(reserved, to_reclaim);
while (1) {
- ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
- if (!ret) {
- __set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(pause);
- pause <<= 1;
- if (pause > HZ / 10)
- pause = HZ / 10;
- } else {
- pause = 1;
- }
+ /* have the flusher threads jump in and do some IO */
+ smp_mb();
+ nr_pages = min_t(unsigned long, nr_pages,
+ root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
+ writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
- spin_lock(&block_rsv->lock);
- if (reserved > block_rsv->reserved)
- reclaimed = reserved - block_rsv->reserved;
- reserved = block_rsv->reserved;
- spin_unlock(&block_rsv->lock);
+ spin_lock(&space_info->lock);
+ if (reserved > space_info->bytes_reserved)
+ reclaimed += reserved - space_info->bytes_reserved;
+ reserved = space_info->bytes_reserved;
+ spin_unlock(&space_info->lock);
if (reserved == 0 || reclaimed >= max_reclaim)
break;
if (trans && trans->transaction->blocked)
return -EAGAIN;
+
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(pause);
+ pause <<= 1;
+ if (pause > HZ / 10)
+ pause = HZ / 10;
+
}
return reclaimed >= to_reclaim;
}
-static int should_retry_reserve(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, int *retries)
+/*
+ * Retries tells us how many times we've called reserve_metadata_bytes. The
+ * idea is if this is the first call (retries == 0) then we will add to our
+ * reserved count if we can't make the allocation in order to hold our place
+ * while we go and try and free up space. That way for retries > 1 we don't try
+ * and add space, we just check to see if the amount of unused space is >= the
+ * total space, meaning that our reservation is valid.
+ *
+ * However if we don't intend to retry this reservation, pass -1 as retries so
+ * that it short circuits this logic.
+ */
+static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 orig_bytes, int flush)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
- int ret;
+ u64 unused;
+ u64 num_bytes = orig_bytes;
+ int retries = 0;
+ int ret = 0;
+ bool reserved = false;
+ bool committed = false;
- if ((*retries) > 2)
- return -ENOSPC;
+again:
+ ret = -ENOSPC;
+ if (reserved)
+ num_bytes = 0;
- ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
- if (ret)
- return 1;
+ spin_lock(&space_info->lock);
+ unused = space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ space_info->bytes_may_use;
- if (trans && trans->transaction->in_commit)
- return -ENOSPC;
+ /*
+ * The idea here is that we've not already over-reserved the block group
+ * then we can go ahead and save our reservation first and then start
+ * flushing if we need to. Otherwise if we've already overcommitted
+ * lets start flushing stuff first and then come back and try to make
+ * our reservation.
+ */
+ if (unused <= space_info->total_bytes) {
+ unused = space_info->total_bytes - unused;
+ if (unused >= num_bytes) {
+ if (!reserved)
+ space_info->bytes_reserved += orig_bytes;
+ ret = 0;
+ } else {
+ /*
+ * Ok set num_bytes to orig_bytes since we aren't
+ * overocmmitted, this way we only try and reclaim what
+ * we need.
+ */
+ num_bytes = orig_bytes;
+ }
+ } else {
+ /*
+ * Ok we're over committed, set num_bytes to the overcommitted
+ * amount plus the amount of bytes that we need for this
+ * reservation.
+ */
+ num_bytes = unused - space_info->total_bytes +
+ (orig_bytes * (retries + 1));
+ }
- ret = shrink_delalloc(trans, root, num_bytes);
- if (ret)
- return ret;
+ /*
+ * Couldn't make our reservation, save our place so while we're trying
+ * to reclaim space we can actually use it instead of somebody else
+ * stealing it from us.
+ */
+ if (ret && !reserved) {
+ space_info->bytes_reserved += orig_bytes;
+ reserved = true;
+ }
- spin_lock(&space_info->lock);
- if (space_info->bytes_pinned < num_bytes)
- ret = 1;
spin_unlock(&space_info->lock);
- if (ret)
- return -ENOSPC;
- (*retries)++;
-
- if (trans)
- return -EAGAIN;
+ if (!ret)
+ return 0;
- trans = btrfs_join_transaction(root, 1);
- BUG_ON(IS_ERR(trans));
- ret = btrfs_commit_transaction(trans, root);
- BUG_ON(ret);
+ if (!flush)
+ goto out;
- return 1;
-}
+ /*
+ * We do synchronous shrinking since we don't actually unreserve
+ * metadata until after the IO is completed.
+ */
+ ret = shrink_delalloc(trans, root, num_bytes, 1);
+ if (ret > 0)
+ return 0;
+ else if (ret < 0)
+ goto out;
-static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
-{
- struct btrfs_space_info *space_info = block_rsv->space_info;
- u64 unused;
- int ret = -ENOSPC;
+ /*
+ * So if we were overcommitted it's possible that somebody else flushed
+ * out enough space and we simply didn't have enough space to reclaim,
+ * so go back around and try again.
+ */
+ if (retries < 2) {
+ retries++;
+ goto again;
+ }
spin_lock(&space_info->lock);
- unused = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly;
+ /*
+ * Not enough space to be reclaimed, don't bother committing the
+ * transaction.
+ */
+ if (space_info->bytes_pinned < orig_bytes)
+ ret = -ENOSPC;
+ spin_unlock(&space_info->lock);
+ if (ret)
+ goto out;
- if (unused < space_info->total_bytes)
- unused = space_info->total_bytes - unused;
- else
- unused = 0;
+ ret = -EAGAIN;
+ if (trans || committed)
+ goto out;
- if (unused >= num_bytes) {
- if (block_rsv->priority >= 10) {
- space_info->bytes_reserved += num_bytes;
- ret = 0;
- } else {
- if ((unused + block_rsv->reserved) *
- block_rsv->priority >=
- (num_bytes + block_rsv->reserved) * 10) {
- space_info->bytes_reserved += num_bytes;
- ret = 0;
- }
- }
+ ret = -ENOSPC;
+ trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ goto out;
+ ret = btrfs_commit_transaction(trans, root);
+ if (!ret) {
+ trans = NULL;
+ committed = true;
+ goto again;
+ }
+
+out:
+ if (reserved) {
+ spin_lock(&space_info->lock);
+ space_info->bytes_reserved -= orig_bytes;
+ spin_unlock(&space_info->lock);
}
- spin_unlock(&space_info->lock);
return ret;
}
@@ -3327,18 +3618,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_fs_info *fs_info = root->fs_info;
- u64 alloc_target;
block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
if (!block_rsv)
return NULL;
btrfs_init_block_rsv(block_rsv);
-
- alloc_target = btrfs_get_alloc_profile(root, 0);
block_rsv->space_info = __find_space_info(fs_info,
BTRFS_BLOCK_GROUP_METADATA);
-
return block_rsv;
}
@@ -3369,23 +3656,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, int *retries)
+ u64 num_bytes)
{
int ret;
if (num_bytes == 0)
return 0;
-again:
- ret = reserve_metadata_bytes(block_rsv, num_bytes);
+
+ ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 1);
return 0;
}
- ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
- if (ret > 0)
- goto again;
-
return ret;
}
@@ -3420,7 +3703,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
return 0;
if (block_rsv->refill_used) {
- ret = reserve_metadata_bytes(block_rsv, num_bytes);
+ ret = reserve_metadata_bytes(trans, root, block_rsv,
+ num_bytes, 0);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 0);
return 0;
@@ -3499,6 +3783,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
spin_lock(&sinfo->lock);
+ if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
+ data_used = 0;
meta_used = sinfo->bytes_used;
spin_unlock(&sinfo->lock);
@@ -3526,7 +3812,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
block_rsv->size = num_bytes;
num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
- sinfo->bytes_reserved + sinfo->bytes_readonly;
+ sinfo->bytes_reserved + sinfo->bytes_readonly +
+ sinfo->bytes_may_use;
if (sinfo->total_bytes > num_bytes) {
num_bytes = sinfo->total_bytes - num_bytes;
@@ -3597,7 +3884,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- int num_items, int *retries)
+ int num_items)
{
u64 num_bytes;
int ret;
@@ -3607,7 +3894,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
num_bytes = calc_trans_metadata_size(root, num_items);
ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
- num_bytes, retries);
+ num_bytes);
if (!ret) {
trans->bytes_reserved += num_bytes;
trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3681,14 +3968,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
u64 to_reserve;
int nr_extents;
- int retries = 0;
int ret;
if (btrfs_transaction_in_commit(root->fs_info))
schedule_timeout(1);
num_bytes = ALIGN(num_bytes, root->sectorsize);
-again:
+
spin_lock(&BTRFS_I(inode)->accounting_lock);
nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
if (nr_extents > BTRFS_I(inode)->reserved_extents) {
@@ -3698,18 +3984,14 @@ again:
nr_extents = 0;
to_reserve = 0;
}
+ spin_unlock(&BTRFS_I(inode)->accounting_lock);
to_reserve += calc_csum_metadata_size(inode, num_bytes);
- ret = reserve_metadata_bytes(block_rsv, to_reserve);
- if (ret) {
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
- ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
- &retries);
- if (ret > 0)
- goto again;
+ ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
+ if (ret)
return ret;
- }
+ spin_lock(&BTRFS_I(inode)->accounting_lock);
BTRFS_I(inode)->reserved_extents += nr_extents;
atomic_inc(&BTRFS_I(inode)->outstanding_extents);
spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -3717,7 +3999,7 @@ again:
block_rsv_add_bytes(block_rsv, to_reserve, 1);
if (block_rsv->size > 512 * 1024 * 1024)
- shrink_delalloc(NULL, root, to_reserve);
+ shrink_delalloc(NULL, root, to_reserve, 0);
return 0;
}
@@ -3776,12 +4058,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group_cache *cache = NULL;
struct btrfs_fs_info *info = root->fs_info;
- int factor;
u64 total = num_bytes;
u64 old_val;
u64 byte_in_group;
+ int factor;
/* block accounting for super block */
spin_lock(&info->delalloc_lock);
@@ -3803,11 +4085,25 @@ static int update_block_group(struct btrfs_trans_handle *trans,
factor = 2;
else
factor = 1;
+ /*
+ * If this block group has free space cache written out, we
+ * need to make sure to load it if we are removing space. This
+ * is because we need the unpinning stage to actually add the
+ * space back to the block group, otherwise we will leak space.
+ */
+ if (!alloc && cache->cached == BTRFS_CACHE_NO)
+ cache_block_group(cache, trans, NULL, 1);
+
byte_in_group = bytenr - cache->key.objectid;
WARN_ON(byte_in_group > cache->key.offset);
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
+
+ if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
+ cache->disk_cache_state < BTRFS_DC_CLEAR)
+ cache->disk_cache_state = BTRFS_DC_CLEAR;
+
cache->dirty = 1;
old_val = btrfs_block_group_used(&cache->item);
num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -4554,6 +4850,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
bool found_uncached_bg = false;
bool failed_cluster_refill = false;
bool failed_alloc = false;
+ bool use_cluster = true;
u64 ideal_cache_percent = 0;
u64 ideal_cache_offset = 0;
@@ -4568,16 +4865,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
return -ENOSPC;
}
+ /*
+ * If the space info is for both data and metadata it means we have a
+ * small filesystem and we can't use the clustering stuff.
+ */
+ if (btrfs_mixed_space_info(space_info))
+ use_cluster = false;
+
if (orig_root->ref_cows || empty_size)
allowed_chunk_alloc = 1;
- if (data & BTRFS_BLOCK_GROUP_METADATA) {
+ if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
last_ptr = &root->fs_info->meta_alloc_cluster;
if (!btrfs_test_opt(root, SSD))
empty_cluster = 64 * 1024;
}
- if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
+ if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
+ btrfs_test_opt(root, SSD)) {
last_ptr = &root->fs_info->data_alloc_cluster;
}
@@ -4637,10 +4942,34 @@ search:
btrfs_get_block_group(block_group);
search_start = block_group->key.objectid;
+ /*
+ * this can happen if we end up cycling through all the
+ * raid types, but we want to make sure we only allocate
+ * for the proper type.
+ */
+ if (!block_group_bits(block_group, data)) {
+ u64 extra = BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10;
+
+ /*
+ * if they asked for extra copies and this block group
+ * doesn't provide them, bail. This does allow us to
+ * fill raid0 from raid1.
+ */
+ if ((data & extra) && !(block_group->flags & extra))
+ goto loop;
+ }
+
have_block_group:
if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
u64 free_percent;
+ ret = cache_block_group(block_group, trans,
+ orig_root, 1);
+ if (block_group->cached == BTRFS_CACHE_FINISHED)
+ goto have_block_group;
+
free_percent = btrfs_block_group_used(&block_group->item);
free_percent *= 100;
free_percent = div64_u64(free_percent,
@@ -4661,7 +4990,8 @@ have_block_group:
if (loop > LOOP_CACHING_NOWAIT ||
(loop > LOOP_FIND_IDEAL &&
atomic_read(&space_info->caching_threads) < 2)) {
- ret = cache_block_group(block_group);
+ ret = cache_block_group(block_group, trans,
+ orig_root, 0);
BUG_ON(ret);
}
found_uncached_bg = true;
@@ -5218,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
u64 num_bytes = ins->offset;
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
- cache_block_group(block_group);
+ cache_block_group(block_group, trans, NULL, 0);
caching_ctl = get_caching_control(block_group);
if (!caching_ctl) {
@@ -5308,7 +5638,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
block_rsv = get_block_rsv(trans, root);
if (block_rsv->size == 0) {
- ret = reserve_metadata_bytes(block_rsv, blocksize);
+ ret = reserve_metadata_bytes(trans, root, block_rsv,
+ blocksize, 0);
if (ret)
return ERR_PTR(ret);
return block_rsv;
@@ -5318,11 +5649,6 @@ use_block_rsv(struct btrfs_trans_handle *trans,
if (!ret)
return block_rsv;
- WARN_ON(1);
- printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
- block_rsv->size, block_rsv->reserved,
- block_rsv->freed[0], block_rsv->freed[1]);
-
return ERR_PTR(-ENOSPC);
}
@@ -5421,7 +5747,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
u64 generation;
u64 refs;
u64 flags;
- u64 last = 0;
u32 nritems;
u32 blocksize;
struct btrfs_key key;
@@ -5489,7 +5814,6 @@ reada:
generation);
if (ret)
break;
- last = bytenr + blocksize;
nread++;
}
wc->reada_slot = slot;
@@ -6009,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
NULL, NULL);
BUG_ON(ret < 0);
if (ret > 0) {
- ret = btrfs_del_orphan_item(trans, tree_root,
- root->root_key.objectid);
- BUG_ON(ret);
+ /* if we fail to delete the orphan item this time
+ * around, it'll get picked up the next time.
+ *
+ * The most common failure here is just -ENOENT.
+ */
+ btrfs_del_orphan_item(trans, tree_root,
+ root->root_key.objectid);
}
}
@@ -7587,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
- num_devices = root->fs_info->fs_devices->rw_devices;
+ /*
+ * we add in the count of missing devices because we want
+ * to make sure that any RAID levels on a degraded FS
+ * continue to be honored.
+ */
+ num_devices = root->fs_info->fs_devices->rw_devices +
+ root->fs_info->fs_devices->missing_devices;
+
if (num_devices == 1) {
stripped |= BTRFS_BLOCK_GROUP_DUP;
stripped = flags & ~stripped;
@@ -7813,6 +8148,40 @@ out:
return ret;
}
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
+{
+ struct btrfs_block_group_cache *block_group;
+ u64 last = 0;
+
+ while (1) {
+ struct inode *inode;
+
+ block_group = btrfs_lookup_first_block_group(info, last);
+ while (block_group) {
+ spin_lock(&block_group->lock);
+ if (block_group->iref)
+ break;
+ spin_unlock(&block_group->lock);
+ block_group = next_block_group(info->tree_root,
+ block_group);
+ }
+ if (!block_group) {
+ if (last == 0)
+ break;
+ last = 0;
+ continue;
+ }
+
+ inode = block_group->inode;
+ block_group->iref = 0;
+ block_group->inode = NULL;
+ spin_unlock(&block_group->lock);
+ iput(inode);
+ last = block_group->key.objectid + block_group->key.offset;
+ btrfs_put_block_group(block_group);
+ }
+}
+
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_block_group_cache *block_group;
@@ -7896,6 +8265,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
struct btrfs_key key;
struct btrfs_key found_key;
struct extent_buffer *leaf;
+ int need_clear = 0;
+ u64 cache_gen;
root = info->extent_root;
key.objectid = 0;
@@ -7905,13 +8276,21 @@ int btrfs_read_block_groups(struct btrfs_root *root)
if (!path)
return -ENOMEM;
+ cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
+ if (cache_gen != 0 &&
+ btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
+ need_clear = 1;
+ if (btrfs_test_opt(root, CLEAR_CACHE))
+ need_clear = 1;
+ if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
+ printk(KERN_INFO "btrfs: disk space caching is enabled\n");
+
while (1) {
ret = find_first_block_group(root, path, &key);
if (ret > 0)
break;
if (ret != 0)
goto error;
-
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
cache = kzalloc(sizeof(*cache), GFP_NOFS);
@@ -7927,6 +8306,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
+ if (need_clear)
+ cache->disk_cache_state = BTRFS_DC_CLEAR;
+
/*
* we only want to have 32k of ram per block group for keeping
* track of free space, and if we pass 1/2 of that we want to
@@ -8031,6 +8413,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->key.offset = size;
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
cache->sectorsize = root->sectorsize;
+ cache->fs_info = root->fs_info;
/*
* we only want to have 32k of ram per block group for keeping track
@@ -8087,8 +8470,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct btrfs_block_group_cache *block_group;
struct btrfs_free_cluster *cluster;
+ struct btrfs_root *tree_root = root->fs_info->tree_root;
struct btrfs_key key;
+ struct inode *inode;
int ret;
+ int factor;
root = root->fs_info->extent_root;
@@ -8097,6 +8483,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
BUG_ON(!block_group->ro);
memcpy(&key, &block_group->key, sizeof(key));
+ if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ factor = 2;
+ else
+ factor = 1;
/* make sure this block group isn't part of an allocation cluster */
cluster = &root->fs_info->data_alloc_cluster;
@@ -8116,6 +8508,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
BUG_ON(!path);
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (!IS_ERR(inode)) {
+ btrfs_orphan_add(trans, inode);
+ clear_nlink(inode);
+ /* One for the block groups ref */
+ spin_lock(&block_group->lock);
+ if (block_group->iref) {
+ block_group->iref = 0;
+ block_group->inode = NULL;
+ spin_unlock(&block_group->lock);
+ iput(inode);
+ } else {
+ spin_unlock(&block_group->lock);
+ }
+ /* One for our lookup ref */
+ iput(inode);
+ }
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+ if (ret > 0)
+ btrfs_release_path(tree_root, path);
+ if (ret == 0) {
+ ret = btrfs_del_item(trans, tree_root, path);
+ if (ret)
+ goto out;
+ btrfs_release_path(tree_root, path);
+ }
+
spin_lock(&root->fs_info->block_group_cache_lock);
rb_erase(&block_group->cache_node,
&root->fs_info->block_group_cache_tree);
@@ -8137,8 +8563,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_lock(&block_group->space_info->lock);
block_group->space_info->total_bytes -= block_group->key.offset;
block_group->space_info->bytes_readonly -= block_group->key.offset;
+ block_group->space_info->disk_total -= block_group->key.offset * factor;
spin_unlock(&block_group->space_info->lock);
+ memcpy(&key, &block_group->key, sizeof(key));
+
btrfs_clear_space_info_full(root->fs_info);
btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d74e6af..3e86b9f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
struct address_space *mapping, gfp_t mask)
{
tree->state = RB_ROOT;
- tree->buffer = RB_ROOT;
+ INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
tree->ops = NULL;
tree->dirty_bytes = 0;
spin_lock_init(&tree->lock);
@@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
return ret;
}
-static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
- u64 offset, struct rb_node *node)
-{
- struct rb_root *root = &tree->buffer;
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct extent_buffer *eb;
-
- while (*p) {
- parent = *p;
- eb = rb_entry(parent, struct extent_buffer, rb_node);
-
- if (offset < eb->start)
- p = &(*p)->rb_left;
- else if (offset > eb->start)
- p = &(*p)->rb_right;
- else
- return eb;
- }
-
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
- return NULL;
-}
-
-static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
- u64 offset)
-{
- struct rb_root *root = &tree->buffer;
- struct rb_node *n = root->rb_node;
- struct extent_buffer *eb;
-
- while (n) {
- eb = rb_entry(n, struct extent_buffer, rb_node);
- if (offset < eb->start)
- n = n->rb_left;
- else if (offset > eb->start)
- n = n->rb_right;
- else
- return eb;
- }
- return NULL;
-}
-
static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
struct extent_state *other)
{
@@ -1872,9 +1828,9 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
bio_put(bio);
}
-static struct bio *
-extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
- gfp_t gfp_flags)
+struct bio *
+btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+ gfp_t gfp_flags)
{
struct bio *bio;
@@ -1901,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
struct page *page = bvec->bv_page;
struct extent_io_tree *tree = bio->bi_private;
u64 start;
- u64 end;
start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
- end = start + bvec->bv_len - 1;
bio->bi_private = NULL;
@@ -1965,7 +1919,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
else
nr = bio_get_nr_vecs(bdev);
- bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+ bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
@@ -2204,7 +2158,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 last_byte = i_size_read(inode);
u64 block_start;
u64 iosize;
- u64 unlock_start;
sector_t sector;
struct extent_state *cached_state = NULL;
struct extent_map *em;
@@ -2329,7 +2282,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, start,
page_end, NULL, 1);
- unlock_start = page_end + 1;
goto done;
}
@@ -2340,7 +2292,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur,
page_end, NULL, 1);
- unlock_start = page_end + 1;
break;
}
em = epd->get_extent(inode, page, pg_offset, cur,
@@ -2387,7 +2338,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
cur += iosize;
pg_offset += iosize;
- unlock_start = cur;
continue;
}
/* leave this out until we have a page_mkwrite call */
@@ -2473,7 +2423,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
pgoff_t index;
pgoff_t end; /* Inclusive */
int scanned = 0;
- int range_whole = 0;
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
@@ -2482,8 +2431,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
} else {
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
- if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
- range_whole = 1;
scanned = 1;
}
retry:
@@ -2823,6 +2770,8 @@ int extent_prepare_write(struct extent_io_tree *tree,
NULL, 1,
end_bio_extent_preparewrite, 0,
0, 0);
+ if (ret && !err)
+ err = ret;
iocount++;
block_start = block_start + iosize;
} else {
@@ -2952,21 +2901,53 @@ out:
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
- int ret;
+ int ret = 0;
u64 off = start;
u64 max = start + len;
u32 flags = 0;
+ u32 found_type;
+ u64 last;
u64 disko = 0;
+ struct btrfs_key found_key;
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
+ struct btrfs_path *path;
+ struct btrfs_file_extent_item *item;
int end = 0;
u64 em_start = 0, em_len = 0;
unsigned long emflags;
- ret = 0;
+ int hole = 0;
if (len == 0)
return -EINVAL;
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->leave_spinning = 1;
+
+ ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
+ path, inode->i_ino, -1, 0);
+ if (ret < 0) {
+ btrfs_free_path(path);
+ return ret;
+ }
+ WARN_ON(!ret);
+ path->slots[0]--;
+ item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+ found_type = btrfs_key_type(&found_key);
+
+ /* No extents, just return */
+ if (found_key.objectid != inode->i_ino ||
+ found_type != BTRFS_EXTENT_DATA_KEY) {
+ btrfs_free_path(path);
+ return 0;
+ }
+ last = found_key.offset;
+ btrfs_free_path(path);
+
lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
&cached_state, GFP_NOFS);
em = get_extent(inode, NULL, 0, off, max - off, 0);
@@ -2976,11 +2957,18 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
ret = PTR_ERR(em);
goto out;
}
+
while (!end) {
+ hole = 0;
off = em->start + em->len;
if (off >= max)
end = 1;
+ if (em->block_start == EXTENT_MAP_HOLE) {
+ hole = 1;
+ goto next;
+ }
+
em_start = em->start;
em_len = em->len;
@@ -2990,8 +2978,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (em->block_start == EXTENT_MAP_LAST_BYTE) {
end = 1;
flags |= FIEMAP_EXTENT_LAST;
- } else if (em->block_start == EXTENT_MAP_HOLE) {
- flags |= FIEMAP_EXTENT_UNWRITTEN;
} else if (em->block_start == EXTENT_MAP_INLINE) {
flags |= (FIEMAP_EXTENT_DATA_INLINE |
FIEMAP_EXTENT_NOT_ALIGNED);
@@ -3004,10 +2990,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
+next:
emflags = em->flags;
free_extent_map(em);
em = NULL;
-
if (!end) {
em = get_extent(inode, NULL, 0, off, max - off, 0);
if (!em)
@@ -3018,15 +3004,23 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
emflags = em->flags;
}
+
if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
- ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
- em_len, flags);
- if (ret)
- goto out_free;
+ if (em_start == last) {
+ flags |= FIEMAP_EXTENT_LAST;
+ end = 1;
+ }
+
+ if (!hole) {
+ ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+ em_len, flags);
+ if (ret)
+ goto out_free;
+ }
}
out_free:
free_extent_map(em);
@@ -3104,6 +3098,39 @@ static void __free_extent_buffer(struct extent_buffer *eb)
kmem_cache_free(extent_buffer_cache, eb);
}
+/*
+ * Helper for releasing extent buffer page.
+ */
+static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
+ unsigned long start_idx)
+{
+ unsigned long index;
+ struct page *page;
+
+ if (!eb->first_page)
+ return;
+
+ index = num_extent_pages(eb->start, eb->len);
+ if (start_idx >= index)
+ return;
+
+ do {
+ index--;
+ page = extent_buffer_page(eb, index);
+ if (page)
+ page_cache_release(page);
+ } while (index != start_idx);
+}
+
+/*
+ * Helper for releasing the extent buffer.
+ */
+static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
+{
+ btrfs_release_extent_buffer_page(eb, 0);
+ __free_extent_buffer(eb);
+}
+
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len,
struct page *page0,
@@ -3117,16 +3144,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
struct page *p;
struct address_space *mapping = tree->mapping;
int uptodate = 1;
+ int ret;
- spin_lock(&tree->buffer_lock);
- eb = buffer_search(tree, start);
- if (eb) {
- atomic_inc(&eb->refs);
- spin_unlock(&tree->buffer_lock);
+ rcu_read_lock();
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (eb && atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
mark_page_accessed(eb->first_page);
return eb;
}
- spin_unlock(&tree->buffer_lock);
+ rcu_read_unlock();
eb = __alloc_extent_buffer(tree, start, len, mask);
if (!eb)
@@ -3165,26 +3192,31 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
if (uptodate)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+ ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+ if (ret)
+ goto free_eb;
+
spin_lock(&tree->buffer_lock);
- exists = buffer_tree_insert(tree, start, &eb->rb_node);
- if (exists) {
+ ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
+ if (ret == -EEXIST) {
+ exists = radix_tree_lookup(&tree->buffer,
+ start >> PAGE_CACHE_SHIFT);
/* add one reference for the caller */
atomic_inc(&exists->refs);
spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
goto free_eb;
}
/* add one reference for the tree */
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
return eb;
free_eb:
if (!atomic_dec_and_test(&eb->refs))
return exists;
- for (index = 1; index < i; index++)
- page_cache_release(extent_buffer_page(eb, index));
- page_cache_release(extent_buffer_page(eb, 0));
- __free_extent_buffer(eb);
+ btrfs_release_extent_buffer(eb);
return exists;
}
@@ -3194,16 +3226,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
{
struct extent_buffer *eb;
- spin_lock(&tree->buffer_lock);
- eb = buffer_search(tree, start);
- if (eb)
- atomic_inc(&eb->refs);
- spin_unlock(&tree->buffer_lock);
-
- if (eb)
+ rcu_read_lock();
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (eb && atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
mark_page_accessed(eb->first_page);
+ return eb;
+ }
+ rcu_read_unlock();
- return eb;
+ return NULL;
}
void free_extent_buffer(struct extent_buffer *eb)
@@ -3833,34 +3865,47 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
}
}
+static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
+{
+ struct extent_buffer *eb =
+ container_of(head, struct extent_buffer, rcu_head);
+
+ btrfs_release_extent_buffer(eb);
+}
+
int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
{
u64 start = page_offset(page);
struct extent_buffer *eb;
int ret = 1;
- unsigned long i;
- unsigned long num_pages;
spin_lock(&tree->buffer_lock);
- eb = buffer_search(tree, start);
- if (!eb)
- goto out;
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (!eb) {
+ spin_unlock(&tree->buffer_lock);
+ return ret;
+ }
- if (atomic_read(&eb->refs) > 1) {
+ if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
ret = 0;
goto out;
}
- if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+
+ /*
+ * set @eb->refs to 0 if it is already 1, and then release the @eb.
+ * Or go back.
+ */
+ if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
ret = 0;
goto out;
}
- /* at this point we can safely release the extent buffer */
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++)
- page_cache_release(extent_buffer_page(eb, i));
- rb_erase(&eb->rb_node, &tree->buffer);
- __free_extent_buffer(eb);
+
+ radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
out:
spin_unlock(&tree->buffer_lock);
+
+ /* at this point we can safely release the extent buffer */
+ if (atomic_read(&eb->refs) == 0)
+ call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
return ret;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5691c7b..4183c81 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -85,7 +85,7 @@ struct extent_io_ops {
struct extent_io_tree {
struct rb_root state;
- struct rb_root buffer;
+ struct radix_tree_root buffer;
struct address_space *mapping;
u64 dirty_bytes;
spinlock_t lock;
@@ -123,7 +123,7 @@ struct extent_buffer {
unsigned long bflags;
atomic_t refs;
struct list_head leak_list;
- struct rb_node rb_node;
+ struct rcu_head rcu_head;
/* the spinlock is used to protect most operations */
spinlock_t lock;
@@ -310,4 +310,7 @@ int extent_clear_unlock_delalloc(struct inode *inode,
struct extent_io_tree *tree,
u64 start, u64 end, struct page *locked_page,
unsigned long op);
+struct bio *
+btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+ gfp_t gfp_flags);
#endif
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 454ca52..23cb8da 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
goto out;
}
if (IS_ERR(rb_node)) {
- em = ERR_PTR(PTR_ERR(rb_node));
+ em = ERR_CAST(rb_node);
goto out;
}
em = rb_entry(rb_node, struct extent_map, rb_node);
@@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
goto out;
}
if (IS_ERR(rb_node)) {
- em = ERR_PTR(PTR_ERR(rb_node));
+ em = ERR_CAST(rb_node);
goto out;
}
em = rb_entry(rb_node, struct extent_map, rb_node);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e354c33..66836d8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
struct page **prepared_pages,
struct iov_iter *i)
{
- size_t copied;
+ size_t copied = 0;
int pg = 0;
int offset = pos & (PAGE_CACHE_SIZE - 1);
+ int total_copied = 0;
while (write_bytes > 0) {
size_t count = min_t(size_t,
PAGE_CACHE_SIZE - offset, write_bytes);
struct page *page = prepared_pages[pg];
-again:
- if (unlikely(iov_iter_fault_in_readable(i, count)))
- return -EFAULT;
-
- /* Copy data from userspace to the current page */
- copied = iov_iter_copy_from_user(page, i, offset, count);
+ /*
+ * Copy data from userspace to the current page
+ *
+ * Disable pagefault to avoid recursive lock since
+ * the pages are already locked
+ */
+ pagefault_disable();
+ copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
+ pagefault_enable();
/* Flush processor's dcache for this page */
flush_dcache_page(page);
iov_iter_advance(i, copied);
write_bytes -= copied;
+ total_copied += copied;
+ /* Return to btrfs_file_aio_write to fault page */
if (unlikely(copied == 0)) {
- count = min_t(size_t, PAGE_CACHE_SIZE - offset,
- iov_iter_single_seg_count(i));
- goto again;
+ break;
}
if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
@@ -81,7 +85,7 @@ again:
offset = 0;
}
}
- return 0;
+ return total_copied;
}
/*
@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
unsigned long last_index;
int will_write;
int buffered = 0;
+ int copied = 0;
+ int dirty_pages = 0;
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
@@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
WARN_ON(num_pages > nrptrs);
memset(pages, 0, sizeof(struct page *) * nrptrs);
- ret = btrfs_delalloc_reserve_space(inode, write_bytes);
+ /*
+ * Fault pages before locking them in prepare_pages
+ * to avoid recursive lock
+ */
+ if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = btrfs_delalloc_reserve_space(inode,
+ num_pages << PAGE_CACHE_SHIFT);
if (ret)
goto out;
@@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
pos, first_index, last_index,
write_bytes);
if (ret) {
- btrfs_delalloc_release_space(inode, write_bytes);
+ btrfs_delalloc_release_space(inode,
+ num_pages << PAGE_CACHE_SHIFT);
goto out;
}
- ret = btrfs_copy_from_user(pos, num_pages,
+ copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, &i);
- if (ret == 0) {
+ dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
+
+ if (num_pages > dirty_pages) {
+ if (copied > 0)
+ atomic_inc(
+ &BTRFS_I(inode)->outstanding_extents);
+ btrfs_delalloc_release_space(inode,
+ (num_pages - dirty_pages) <<
+ PAGE_CACHE_SHIFT);
+ }
+
+ if (copied > 0) {
dirty_and_release_pages(NULL, root, file, pages,
- num_pages, pos, write_bytes);
+ dirty_pages, pos, copied);
}
btrfs_drop_pages(pages, num_pages);
- if (ret) {
- btrfs_delalloc_release_space(inode, write_bytes);
- goto out;
- }
- if (will_write) {
- filemap_fdatawrite_range(inode->i_mapping, pos,
- pos + write_bytes - 1);
- } else {
- balance_dirty_pages_ratelimited_nr(inode->i_mapping,
- num_pages);
- if (num_pages <
- (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
- btrfs_btree_balance_dirty(root, 1);
- btrfs_throttle(root);
+ if (copied > 0) {
+ if (will_write) {
+ filemap_fdatawrite_range(inode->i_mapping, pos,
+ pos + copied - 1);
+ } else {
+ balance_dirty_pages_ratelimited_nr(
+ inode->i_mapping,
+ dirty_pages);
+ if (dirty_pages <
+ (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
+ btrfs_btree_balance_dirty(root, 1);
+ btrfs_throttle(root);
+ }
}
- pos += write_bytes;
- num_written += write_bytes;
+ pos += copied;
+ num_written += copied;
cond_resched();
}
@@ -1047,8 +1075,14 @@ out:
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ num_written = PTR_ERR(trans);
+ goto done;
+ }
+ mutex_lock(&inode->i_mutex);
ret = btrfs_log_dentry_safe(trans, root,
file->f_dentry);
+ mutex_unlock(&inode->i_mutex);
if (ret == 0) {
ret = btrfs_sync_log(trans, root);
if (ret == 0)
@@ -1067,6 +1101,7 @@ out:
(start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
}
}
+done:
current->backing_dev_info = NULL;
return num_written ? num_written : err;
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f488fac..60d6842 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -23,10 +23,763 @@
#include "ctree.h"
#include "free-space-cache.h"
#include "transaction.h"
+#include "disk-io.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
+static void recalculate_thresholds(struct btrfs_block_group_cache
+ *block_group);
+static int link_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_space *info);
+
+struct inode *lookup_free_space_inode(struct btrfs_root *root,
+ struct btrfs_block_group_cache
+ *block_group, struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ struct btrfs_key location;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_free_space_header *header;
+ struct extent_buffer *leaf;
+ struct inode *inode = NULL;
+ int ret;
+
+ spin_lock(&block_group->lock);
+ if (block_group->inode)
+ inode = igrab(block_group->inode);
+ spin_unlock(&block_group->lock);
+ if (inode)
+ return inode;
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0) {
+ btrfs_release_path(root, path);
+ return ERR_PTR(-ENOENT);
+ }
+
+ leaf = path->nodes[0];
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ btrfs_free_space_key(leaf, header, &disk_key);
+ btrfs_disk_key_to_cpu(&location, &disk_key);
+ btrfs_release_path(root, path);
+
+ inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
+ if (!inode)
+ return ERR_PTR(-ENOENT);
+ if (IS_ERR(inode))
+ return inode;
+ if (is_bad_inode(inode)) {
+ iput(inode);
+ return ERR_PTR(-ENOENT);
+ }
+
+ spin_lock(&block_group->lock);
+ if (!root->fs_info->closing) {
+ block_group->inode = igrab(inode);
+ block_group->iref = 1;
+ }
+ spin_unlock(&block_group->lock);
+
+ return inode;
+}
+
+int create_free_space_inode(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_free_space_header *header;
+ struct btrfs_inode_item *inode_item;
+ struct extent_buffer *leaf;
+ u64 objectid;
+ int ret;
+
+ ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
+ if (ret < 0)
+ return ret;
+
+ ret = btrfs_insert_empty_inode(trans, root, path, objectid);
+ if (ret)
+ return ret;
+
+ leaf = path->nodes[0];
+ inode_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_inode_item);
+ btrfs_item_key(leaf, &disk_key, path->slots[0]);
+ memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
+ sizeof(*inode_item));
+ btrfs_set_inode_generation(leaf, inode_item, trans->transid);
+ btrfs_set_inode_size(leaf, inode_item, 0);
+ btrfs_set_inode_nbytes(leaf, inode_item, 0);
+ btrfs_set_inode_uid(leaf, inode_item, 0);
+ btrfs_set_inode_gid(leaf, inode_item, 0);
+ btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
+ btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
+ BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
+ btrfs_set_inode_nlink(leaf, inode_item, 1);
+ btrfs_set_inode_transid(leaf, inode_item, trans->transid);
+ btrfs_set_inode_block_group(leaf, inode_item,
+ block_group->key.objectid);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(root, path);
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ sizeof(struct btrfs_free_space_header));
+ if (ret < 0) {
+ btrfs_release_path(root, path);
+ return ret;
+ }
+ leaf = path->nodes[0];
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
+ btrfs_set_free_space_key(leaf, header, &disk_key);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(root, path);
+
+ return 0;
+}
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct inode *inode)
+{
+ loff_t oldsize;
+ int ret = 0;
+
+ trans->block_rsv = root->orphan_block_rsv;
+ ret = btrfs_block_rsv_check(trans, root,
+ root->orphan_block_rsv,
+ 0, 5);
+ if (ret)
+ return ret;
+
+ oldsize = i_size_read(inode);
+ btrfs_i_size_write(inode, 0);
+ truncate_pagecache(inode, oldsize, 0);
+
+ /*
+ * We don't need an orphan item because truncating the free space cache
+ * will never be split across transactions.
+ */
+ ret = btrfs_truncate_inode_items(trans, root, inode,
+ 0, BTRFS_EXTENT_DATA_KEY);
+ if (ret) {
+ WARN_ON(1);
+ return ret;
+ }
+
+ return btrfs_update_inode(trans, root, inode);
+}
+
+static int readahead_cache(struct inode *inode)
+{
+ struct file_ra_state *ra;
+ unsigned long last_index;
+
+ ra = kzalloc(sizeof(*ra), GFP_NOFS);
+ if (!ra)
+ return -ENOMEM;
+
+ file_ra_state_init(ra, inode->i_mapping);
+ last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+
+ page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
+
+ kfree(ra);
+
+ return 0;
+}
+
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group)
+{
+ struct btrfs_root *root = fs_info->tree_root;
+ struct inode *inode;
+ struct btrfs_free_space_header *header;
+ struct extent_buffer *leaf;
+ struct page *page;
+ struct btrfs_path *path;
+ u32 *checksums = NULL, *crc;
+ char *disk_crcs = NULL;
+ struct btrfs_key key;
+ struct list_head bitmaps;
+ u64 num_entries;
+ u64 num_bitmaps;
+ u64 generation;
+ u32 cur_crc = ~(u32)0;
+ pgoff_t index = 0;
+ unsigned long first_page_offset;
+ int num_checksums;
+ int ret = 0;
+
+ /*
+ * If we're unmounting then just return, since this does a search on the
+ * normal root and not the commit root and we could deadlock.
+ */
+ smp_mb();
+ if (fs_info->closing)
+ return 0;
+
+ /*
+ * If this block group has been marked to be cleared for one reason or
+ * another then we can't trust the on disk cache, so just return.
+ */
+ spin_lock(&block_group->lock);
+ if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+ spin_unlock(&block_group->lock);
+
+ INIT_LIST_HEAD(&bitmaps);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return 0;
+
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (IS_ERR(inode)) {
+ btrfs_free_path(path);
+ return 0;
+ }
+
+ /* Nothing in the space cache, goodbye */
+ if (!i_size_read(inode)) {
+ btrfs_free_path(path);
+ goto out;
+ }
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret) {
+ btrfs_free_path(path);
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ num_entries = btrfs_free_space_entries(leaf, header);
+ num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
+ generation = btrfs_free_space_generation(leaf, header);
+ btrfs_free_path(path);
+
+ if (BTRFS_I(inode)->generation != generation) {
+ printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
+ " not match free space cache generation (%llu) for "
+ "block group %llu\n",
+ (unsigned long long)BTRFS_I(inode)->generation,
+ (unsigned long long)generation,
+ (unsigned long long)block_group->key.objectid);
+ goto free_cache;
+ }
+
+ if (!num_entries)
+ goto out;
+
+ /* Setup everything for doing checksumming */
+ num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+ checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+ if (!checksums)
+ goto out;
+ first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+ disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
+ if (!disk_crcs)
+ goto out;
+
+ ret = readahead_cache(inode);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ while (1) {
+ struct btrfs_free_space_entry *entry;
+ struct btrfs_free_space *e;
+ void *addr;
+ unsigned long offset = 0;
+ unsigned long start_offset = 0;
+ int need_loop = 0;
+
+ if (!num_entries && !num_bitmaps)
+ break;
+
+ if (index == 0) {
+ start_offset = first_page_offset;
+ offset = start_offset;
+ }
+
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page) {
+ ret = 0;
+ goto free_cache;
+ }
+
+ if (!PageUptodate(page)) {
+ btrfs_readpage(NULL, page);
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ printk(KERN_ERR "btrfs: error reading free "
+ "space cache: %llu\n",
+ (unsigned long long)
+ block_group->key.objectid);
+ goto free_cache;
+ }
+ }
+ addr = kmap(page);
+
+ if (index == 0) {
+ u64 *gen;
+
+ memcpy(disk_crcs, addr, first_page_offset);
+ gen = addr + (sizeof(u32) * num_checksums);
+ if (*gen != BTRFS_I(inode)->generation) {
+ printk(KERN_ERR "btrfs: space cache generation"
+ " (%llu) does not match inode (%llu) "
+ "for block group %llu\n",
+ (unsigned long long)*gen,
+ (unsigned long long)
+ BTRFS_I(inode)->generation,
+ (unsigned long long)
+ block_group->key.objectid);
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+ crc = (u32 *)disk_crcs;
+ }
+ entry = addr + start_offset;
+
+ /* First lets check our crc before we do anything fun */
+ cur_crc = ~(u32)0;
+ cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
+ PAGE_CACHE_SIZE - start_offset);
+ btrfs_csum_final(cur_crc, (char *)&cur_crc);
+ if (cur_crc != *crc) {
+ printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
+ "block group %llu\n", index,
+ (unsigned long long)block_group->key.objectid);
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+ crc++;
+
+ while (1) {
+ if (!num_entries)
+ break;
+
+ need_loop = 1;
+ e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
+ if (!e) {
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+
+ e->offset = le64_to_cpu(entry->offset);
+ e->bytes = le64_to_cpu(entry->bytes);
+ if (!e->bytes) {
+ kunmap(page);
+ kfree(e);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+
+ if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
+ spin_lock(&block_group->tree_lock);
+ ret = link_free_space(block_group, e);
+ spin_unlock(&block_group->tree_lock);
+ BUG_ON(ret);
+ } else {
+ e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+ if (!e->bitmap) {
+ kunmap(page);
+ kfree(e);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+ spin_lock(&block_group->tree_lock);
+ ret = link_free_space(block_group, e);
+ block_group->total_bitmaps++;
+ recalculate_thresholds(block_group);
+ spin_unlock(&block_group->tree_lock);
+ list_add_tail(&e->list, &bitmaps);
+ }
+
+ num_entries--;
+ offset += sizeof(struct btrfs_free_space_entry);
+ if (offset + sizeof(struct btrfs_free_space_entry) >=
+ PAGE_CACHE_SIZE)
+ break;
+ entry++;
+ }
+
+ /*
+ * We read an entry out of this page, we need to move on to the
+ * next page.
+ */
+ if (need_loop) {
+ kunmap(page);
+ goto next;
+ }
+
+ /*
+ * We add the bitmaps at the end of the entries in order that
+ * the bitmap entries are added to the cache.
+ */
+ e = list_entry(bitmaps.next, struct btrfs_free_space, list);
+ list_del_init(&e->list);
+ memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
+ kunmap(page);
+ num_bitmaps--;
+next:
+ unlock_page(page);
+ page_cache_release(page);
+ index++;
+ }
+
+ ret = 1;
+out:
+ kfree(checksums);
+ kfree(disk_crcs);
+ iput(inode);
+ return ret;
+
+free_cache:
+ /* This cache is bogus, make sure it gets cleared */
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = BTRFS_DC_CLEAR;
+ spin_unlock(&block_group->lock);
+ btrfs_remove_free_space_cache(block_group);
+ goto out;
+}
+
+int btrfs_write_out_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_free_space_header *header;
+ struct extent_buffer *leaf;
+ struct inode *inode;
+ struct rb_node *node;
+ struct list_head *pos, *n;
+ struct page *page;
+ struct extent_state *cached_state = NULL;
+ struct list_head bitmap_list;
+ struct btrfs_key key;
+ u64 bytes = 0;
+ u32 *crc, *checksums;
+ pgoff_t index = 0, last_index = 0;
+ unsigned long first_page_offset;
+ int num_checksums;
+ int entries = 0;
+ int bitmaps = 0;
+ int ret = 0;
+
+ root = root->fs_info->tree_root;
+
+ INIT_LIST_HEAD(&bitmap_list);
+
+ spin_lock(&block_group->lock);
+ if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+ spin_unlock(&block_group->lock);
+
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (IS_ERR(inode))
+ return 0;
+
+ if (!i_size_read(inode)) {
+ iput(inode);
+ return 0;
+ }
+
+ node = rb_first(&block_group->free_space_offset);
+ if (!node) {
+ iput(inode);
+ return 0;
+ }
+
+ last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+ filemap_write_and_wait(inode->i_mapping);
+ btrfs_wait_ordered_range(inode, inode->i_size &
+ ~(root->sectorsize - 1), (u64)-1);
+
+ /* We need a checksum per page. */
+ num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+ crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+ if (!crc) {
+ iput(inode);
+ return 0;
+ }
+
+ /* Since the first page has all of our checksums and our generation we
+ * need to calculate the offset into the page that we can start writing
+ * our entries.
+ */
+ first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+
+ /*
+ * Lock all pages first so we can lock the extent safely.
+ *
+ * NOTE: Because we hold the ref the entire time we're going to write to
+ * the page find_get_page should never fail, so we don't do a check
+ * after find_get_page at this point. Just putting this here so people
+ * know and don't freak out.
+ */
+ while (index <= last_index) {
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page) {
+ pgoff_t i = 0;
+
+ while (i < index) {
+ page = find_get_page(inode->i_mapping, i);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ i++;
+ }
+ goto out_free;
+ }
+ index++;
+ }
+
+ index = 0;
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
+ 0, &cached_state, GFP_NOFS);
+
+ /* Write out the extent entries */
+ do {
+ struct btrfs_free_space_entry *entry;
+ void *addr;
+ unsigned long offset = 0;
+ unsigned long start_offset = 0;
+
+ if (index == 0) {
+ start_offset = first_page_offset;
+ offset = start_offset;
+ }
+
+ page = find_get_page(inode->i_mapping, index);
+
+ addr = kmap(page);
+ entry = addr + start_offset;
+
+ memset(addr, 0, PAGE_CACHE_SIZE);
+ while (1) {
+ struct btrfs_free_space *e;
+
+ e = rb_entry(node, struct btrfs_free_space, offset_index);
+ entries++;
+
+ entry->offset = cpu_to_le64(e->offset);
+ entry->bytes = cpu_to_le64(e->bytes);
+ if (e->bitmap) {
+ entry->type = BTRFS_FREE_SPACE_BITMAP;
+ list_add_tail(&e->list, &bitmap_list);
+ bitmaps++;
+ } else {
+ entry->type = BTRFS_FREE_SPACE_EXTENT;
+ }
+ node = rb_next(node);
+ if (!node)
+ break;
+ offset += sizeof(struct btrfs_free_space_entry);
+ if (offset + sizeof(struct btrfs_free_space_entry) >=
+ PAGE_CACHE_SIZE)
+ break;
+ entry++;
+ }
+ *crc = ~(u32)0;
+ *crc = btrfs_csum_data(root, addr + start_offset, *crc,
+ PAGE_CACHE_SIZE - start_offset);
+ kunmap(page);
+
+ btrfs_csum_final(*crc, (char *)crc);
+ crc++;
+
+ bytes += PAGE_CACHE_SIZE;
+
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+
+ /*
+ * We need to release our reference we got for grab_cache_page,
+ * except for the first page which will hold our checksums, we
+ * do that below.
+ */
+ if (index != 0) {
+ unlock_page(page);
+ page_cache_release(page);
+ }
+
+ page_cache_release(page);
+
+ index++;
+ } while (node);
+
+ /* Write out the bitmaps */
+ list_for_each_safe(pos, n, &bitmap_list) {
+ void *addr;
+ struct btrfs_free_space *entry =
+ list_entry(pos, struct btrfs_free_space, list);
+
+ page = find_get_page(inode->i_mapping, index);
+
+ addr = kmap(page);
+ memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
+ *crc = ~(u32)0;
+ *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
+ kunmap(page);
+ btrfs_csum_final(*crc, (char *)crc);
+ crc++;
+ bytes += PAGE_CACHE_SIZE;
+
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ list_del_init(&entry->list);
+ index++;
+ }
+
+ /* Zero out the rest of the pages just to make sure */
+ while (index <= last_index) {
+ void *addr;
+
+ page = find_get_page(inode->i_mapping, index);
+
+ addr = kmap(page);
+ memset(addr, 0, PAGE_CACHE_SIZE);
+ kunmap(page);
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ bytes += PAGE_CACHE_SIZE;
+ index++;
+ }
+
+ btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
+
+ /* Write the checksums and trans id to the first page */
+ {
+ void *addr;
+ u64 *gen;
+
+ page = find_get_page(inode->i_mapping, 0);
+
+ addr = kmap(page);
+ memcpy(addr, checksums, sizeof(u32) * num_checksums);
+ gen = addr + (sizeof(u32) * num_checksums);
+ *gen = trans->transid;
+ kunmap(page);
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ }
+ BTRFS_I(inode)->generation = trans->transid;
+
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
+ i_size_read(inode) - 1, &cached_state, GFP_NOFS);
+
+ filemap_write_and_wait(inode->i_mapping);
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ if (ret < 0) {
+ ret = 0;
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
+ goto out_free;
+ }
+ leaf = path->nodes[0];
+ if (ret > 0) {
+ struct btrfs_key found_key;
+ BUG_ON(!path->slots[0]);
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
+ found_key.offset != block_group->key.objectid) {
+ ret = 0;
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 0, 0, NULL,
+ GFP_NOFS);
+ btrfs_release_path(root, path);
+ goto out_free;
+ }
+ }
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ btrfs_set_free_space_entries(leaf, header, entries);
+ btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
+ btrfs_set_free_space_generation(leaf, header, trans->transid);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(root, path);
+
+ ret = 1;
+
+out_free:
+ if (ret == 0) {
+ invalidate_inode_pages2_range(inode->i_mapping, 0, index);
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = BTRFS_DC_ERROR;
+ spin_unlock(&block_group->lock);
+ BTRFS_I(inode)->generation = 0;
+ }
+ kfree(checksums);
+ btrfs_update_inode(trans, root, inode);
+ iput(inode);
+ return ret;
+}
+
static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
u64 offset)
{
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 890a8e7..e49ca5c 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,24 @@ struct btrfs_free_space {
struct list_head list;
};
+struct inode *lookup_free_space_inode(struct btrfs_root *root,
+ struct btrfs_block_group_cache
+ *block_group, struct btrfs_path *path);
+int create_free_space_inode(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path);
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct inode *inode);
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group);
+int btrfs_write_out_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path);
int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size);
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 64f99cf..a0ff46a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -319,8 +319,6 @@ static noinline int compress_file_range(struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
u64 num_bytes;
- u64 orig_start;
- u64 disk_num_bytes;
u64 blocksize = root->sectorsize;
u64 actual_end;
u64 isize = i_size_read(inode);
@@ -335,8 +333,6 @@ static noinline int compress_file_range(struct inode *inode,
int i;
int will_compress;
- orig_start = start;
-
actual_end = min_t(u64, isize, end + 1);
again:
will_compress = 0;
@@ -371,7 +367,6 @@ again:
total_compressed = min(total_compressed, max_uncompressed);
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
num_bytes = max(blocksize, num_bytes);
- disk_num_bytes = num_bytes;
total_in = 0;
ret = 0;
@@ -467,7 +462,6 @@ again:
if (total_compressed >= total_in) {
will_compress = 0;
} else {
- disk_num_bytes = total_compressed;
num_bytes = total_in;
}
}
@@ -501,7 +495,7 @@ again:
add_async_extent(async_cow, start, num_bytes,
total_compressed, pages, nr_pages_ret);
- if (start + num_bytes < end && start + num_bytes < actual_end) {
+ if (start + num_bytes < end) {
start += num_bytes;
pages = NULL;
cond_resched();
@@ -757,20 +751,17 @@ static noinline int cow_file_range(struct inode *inode,
u64 disk_num_bytes;
u64 cur_alloc_size;
u64 blocksize = root->sectorsize;
- u64 actual_end;
- u64 isize = i_size_read(inode);
struct btrfs_key ins;
struct extent_map *em;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
+ BUG_ON(root == root->fs_info->tree_root);
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- actual_end = min_t(u64, isize, end + 1);
-
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
num_bytes = max(blocksize, num_bytes);
disk_num_bytes = num_bytes;
@@ -1035,10 +1026,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
int type;
int nocow;
int check_prev = 1;
+ bool nolock = false;
path = btrfs_alloc_path();
BUG_ON(!path);
- trans = btrfs_join_transaction(root, 1);
+ if (root == root->fs_info->tree_root) {
+ nolock = true;
+ trans = btrfs_join_transaction_nolock(root, 1);
+ } else {
+ trans = btrfs_join_transaction(root, 1);
+ }
BUG_ON(!trans);
cow_start = (u64)-1;
@@ -1211,8 +1208,13 @@ out_check:
BUG_ON(ret);
}
- ret = btrfs_end_transaction(trans, root);
- BUG_ON(ret);
+ if (nolock) {
+ ret = btrfs_end_transaction_nolock(trans, root);
+ BUG_ON(ret);
+ } else {
+ ret = btrfs_end_transaction(trans, root);
+ BUG_ON(ret);
+ }
btrfs_free_path(path);
return 0;
}
@@ -1289,6 +1291,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
+ int do_list = (root->root_key.objectid !=
+ BTRFS_ROOT_TREE_OBJECTID);
if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1298,7 +1302,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
spin_lock(&root->fs_info->delalloc_lock);
BTRFS_I(inode)->delalloc_bytes += len;
root->fs_info->delalloc_bytes += len;
- if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+ if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
&root->fs_info->delalloc_inodes);
}
@@ -1321,6 +1325,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
+ int do_list = (root->root_key.objectid !=
+ BTRFS_ROOT_TREE_OBJECTID);
if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1330,14 +1336,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if (*bits & EXTENT_DO_ACCOUNTING)
btrfs_delalloc_release_metadata(inode, len);
- if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+ if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
+ && do_list)
btrfs_free_reserved_data_space(inode, len);
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->delalloc_bytes -= len;
BTRFS_I(inode)->delalloc_bytes -= len;
- if (BTRFS_I(inode)->delalloc_bytes == 0 &&
+ if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_del_init(&BTRFS_I(inode)->delalloc_inodes);
}
@@ -1372,7 +1379,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
if (map_length < length + size)
return 1;
- return 0;
+ return ret;
}
/*
@@ -1426,7 +1433,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+ if (root == root->fs_info->tree_root)
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
+ else
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
if (!(rw & REQ_WRITE)) {
@@ -1662,6 +1672,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
struct extent_state *cached_state = NULL;
int compressed = 0;
int ret;
+ bool nolock = false;
ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
end - start + 1);
@@ -1669,11 +1680,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
return 0;
BUG_ON(!ordered_extent);
+ nolock = (root == root->fs_info->tree_root);
+
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list));
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (!ret) {
- trans = btrfs_join_transaction(root, 1);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root, 1);
+ else
+ trans = btrfs_join_transaction(root, 1);
+ BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
@@ -1686,7 +1703,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->file_offset + ordered_extent->len - 1,
0, &cached_state, GFP_NOFS);
- trans = btrfs_join_transaction(root, 1);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root, 1);
+ else
+ trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1700,6 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->len);
BUG_ON(ret);
} else {
+ BUG_ON(root == root->fs_info->tree_root);
ret = insert_reserved_file_extent(trans, inode,
ordered_extent->file_offset,
ordered_extent->start,
@@ -1724,9 +1745,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
out:
- btrfs_delalloc_release_metadata(inode, ordered_extent->len);
- if (trans)
- btrfs_end_transaction(trans, root);
+ if (nolock) {
+ if (trans)
+ btrfs_end_transaction_nolock(trans, root);
+ } else {
+ btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+ if (trans)
+ btrfs_end_transaction(trans, root);
+ }
+
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
@@ -2237,7 +2264,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
{
struct btrfs_path *path;
struct extent_buffer *leaf;
- struct btrfs_item *item;
struct btrfs_key key, found_key;
struct btrfs_trans_handle *trans;
struct inode *inode;
@@ -2275,7 +2301,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
/* pull out the item */
leaf = path->nodes[0];
- item = btrfs_item_nr(leaf, path->slots[0]);
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
/* make sure the item matches what we want */
@@ -2651,7 +2676,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
dir, index);
- BUG_ON(ret);
+ if (ret == -ENOENT)
+ ret = 0;
err:
btrfs_free_path(path);
if (ret)
@@ -2672,8 +2698,8 @@ static int check_path_shared(struct btrfs_root *root,
{
struct extent_buffer *eb;
int level;
- int ret;
u64 refs = 1;
+ int uninitialized_var(ret);
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level])
@@ -2686,7 +2712,7 @@ static int check_path_shared(struct btrfs_root *root,
if (refs > 1)
return 1;
}
- return 0;
+ return ret; /* XXX callers? */
}
/*
@@ -3196,7 +3222,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
- if (root->ref_cows)
+ if (root->ref_cows || root == root->fs_info->tree_root)
btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
path = btrfs_alloc_path();
@@ -3344,7 +3370,8 @@ delete:
} else {
break;
}
- if (found_extent && root->ref_cows) {
+ if (found_extent && (root->ref_cows ||
+ root == root->fs_info->tree_root)) {
btrfs_set_path_blocking(path);
ret = btrfs_free_extent(trans, root, extent_start,
extent_num_bytes, 0,
@@ -3675,7 +3702,8 @@ void btrfs_evict_inode(struct inode *inode)
int ret;
truncate_inode_pages(&inode->i_data, 0);
- if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
+ if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
+ root == root->fs_info->tree_root))
goto no_delete;
if (is_bad_inode(inode)) {
@@ -3888,7 +3916,14 @@ static void inode_tree_del(struct inode *inode)
}
spin_unlock(&root->inode_lock);
- if (empty && btrfs_root_refs(&root->root_item) == 0) {
+ /*
+ * Free space cache has inodes in the tree root, but the tree root has a
+ * root_refs of 0, so this could end up dropping the tree root as a
+ * snapshot, so we need the extra !root->fs_info->tree_root check to
+ * make sure we don't drop it.
+ */
+ if (empty && btrfs_root_refs(&root->root_item) == 0 &&
+ root != root->fs_info->tree_root) {
synchronize_srcu(&root->fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4049,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
int index;
int ret;
- dentry->d_op = &btrfs_dentry_operations;
+ d_set_d_op(dentry, &btrfs_dentry_operations);
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
@@ -4092,7 +4127,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
return inode;
}
-static int btrfs_dentry_delete(struct dentry *dentry)
+static int btrfs_dentry_delete(const struct dentry *dentry)
{
struct btrfs_root *root;
@@ -4282,14 +4317,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret = 0;
+ bool nolock = false;
if (BTRFS_I(inode)->dummy_inode)
return 0;
+ smp_mb();
+ nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
+
if (wbc->sync_mode == WB_SYNC_ALL) {
- trans = btrfs_join_transaction(root, 1);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root, 1);
+ else
+ trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
- ret = btrfs_commit_transaction(trans, root);
+ if (nolock)
+ ret = btrfs_end_transaction_nolock(trans, root);
+ else
+ ret = btrfs_commit_transaction(trans, root);
}
return ret;
}
@@ -4456,6 +4501,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->index_cnt = 2;
BTRFS_I(inode)->root = root;
BTRFS_I(inode)->generation = trans->transid;
+ inode->i_generation = BTRFS_I(inode)->generation;
btrfs_set_inode_space_info(root, inode);
if (mode & S_IFDIR)
@@ -4577,12 +4623,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
}
static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
- struct dentry *dentry, struct inode *inode,
- int backref, u64 index)
+ struct inode *dir, struct dentry *dentry,
+ struct inode *inode, int backref, u64 index)
{
- int err = btrfs_add_link(trans, dentry->d_parent->d_inode,
- inode, dentry->d_name.name,
- dentry->d_name.len, backref, index);
+ int err = btrfs_add_link(trans, dir, inode,
+ dentry->d_name.name, dentry->d_name.len,
+ backref, index);
if (!err) {
d_instantiate(dentry, inode);
return 0;
@@ -4623,8 +4669,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
btrfs_set_trans_block_group(trans, dir);
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len,
- dentry->d_parent->d_inode->i_ino, objectid,
+ dentry->d_name.len, dir->i_ino, objectid,
BTRFS_I(dir)->block_group, mode, &index);
err = PTR_ERR(inode);
if (IS_ERR(inode))
@@ -4637,7 +4682,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
}
btrfs_set_trans_block_group(trans, inode);
- err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+ err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
drop_inode = 1;
else {
@@ -4685,10 +4730,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
btrfs_set_trans_block_group(trans, dir);
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len,
- dentry->d_parent->d_inode->i_ino,
- objectid, BTRFS_I(dir)->block_group, mode,
- &index);
+ dentry->d_name.len, dir->i_ino, objectid,
+ BTRFS_I(dir)->block_group, mode, &index);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_unlock;
@@ -4700,7 +4743,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
}
btrfs_set_trans_block_group(trans, inode);
- err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+ err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
drop_inode = 1;
else {
@@ -4742,6 +4785,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
return -EPERM;
btrfs_inc_nlink(inode);
+ inode->i_ctime = CURRENT_TIME;
err = btrfs_set_inode_index(dir, &index);
if (err)
@@ -4760,15 +4804,17 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
btrfs_set_trans_block_group(trans, dir);
ihold(inode);
- err = btrfs_add_nondir(trans, dentry, inode, 1, index);
+ err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
if (err) {
drop_inode = 1;
} else {
+ struct dentry *parent = dget_parent(dentry);
btrfs_update_inode_block_group(trans, dir);
err = btrfs_update_inode(trans, root, inode);
BUG_ON(err);
- btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
+ btrfs_log_new_name(trans, inode, NULL, parent);
+ dput(parent);
}
nr = trans->blocks_used;
@@ -4808,8 +4854,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
btrfs_set_trans_block_group(trans, dir);
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len,
- dentry->d_parent->d_inode->i_ino, objectid,
+ dentry->d_name.len, dir->i_ino, objectid,
BTRFS_I(dir)->block_group, S_IFDIR | mode,
&index);
if (IS_ERR(inode)) {
@@ -4832,9 +4877,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (err)
goto out_fail;
- err = btrfs_add_link(trans, dentry->d_parent->d_inode,
- inode, dentry->d_name.name,
- dentry->d_name.len, 0, index);
+ err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
+ dentry->d_name.len, 0, index);
if (err)
goto out_fail;
@@ -5490,13 +5534,21 @@ struct btrfs_dio_private {
u64 bytes;
u32 *csums;
void *private;
+
+ /* number of bios pending for this dio */
+ atomic_t pending_bios;
+
+ /* IO errors */
+ int errors;
+
+ struct bio *orig_bio;
};
static void btrfs_endio_direct_read(struct bio *bio, int err)
{
+ struct btrfs_dio_private *dip = bio->bi_private;
struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
struct bio_vec *bvec = bio->bi_io_vec;
- struct btrfs_dio_private *dip = bio->bi_private;
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 start;
@@ -5550,15 +5602,18 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
struct btrfs_trans_handle *trans;
struct btrfs_ordered_extent *ordered = NULL;
struct extent_state *cached_state = NULL;
+ u64 ordered_offset = dip->logical_offset;
+ u64 ordered_bytes = dip->bytes;
int ret;
if (err)
goto out_done;
-
- ret = btrfs_dec_test_ordered_pending(inode, &ordered,
- dip->logical_offset, dip->bytes);
+again:
+ ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
+ &ordered_offset,
+ ordered_bytes);
if (!ret)
- goto out_done;
+ goto out_test;
BUG_ON(!ordered);
@@ -5618,8 +5673,20 @@ out_unlock:
out:
btrfs_delalloc_release_metadata(inode, ordered->len);
btrfs_end_transaction(trans, root);
+ ordered_offset = ordered->file_offset + ordered->len;
btrfs_put_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
+
+out_test:
+ /*
+ * our bio might span multiple ordered extents. If we haven't
+ * completed the accounting for the whole dio, go back and try again
+ */
+ if (ordered_offset < dip->logical_offset + dip->bytes) {
+ ordered_bytes = dip->logical_offset + dip->bytes -
+ ordered_offset;
+ goto again;
+ }
out_done:
bio->bi_private = dip->private;
@@ -5639,13 +5706,182 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
return 0;
}
+static void btrfs_end_dio_bio(struct bio *bio, int err)
+{
+ struct btrfs_dio_private *dip = bio->bi_private;
+
+ if (err) {
+ printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
+ "sector %#Lx len %u err no %d\n",
+ dip->inode->i_ino, bio->bi_rw,
+ (unsigned long long)bio->bi_sector, bio->bi_size, err);
+ dip->errors = 1;
+
+ /*
+ * before atomic variable goto zero, we must make sure
+ * dip->errors is perceived to be set.
+ */
+ smp_mb__before_atomic_dec();
+ }
+
+ /* if there are more bios still pending for this dio, just exit */
+ if (!atomic_dec_and_test(&dip->pending_bios))
+ goto out;
+
+ if (dip->errors)
+ bio_io_error(dip->orig_bio);
+ else {
+ set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
+ bio_endio(dip->orig_bio, 0);
+ }
+out:
+ bio_put(bio);
+}
+
+static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
+ u64 first_sector, gfp_t gfp_flags)
+{
+ int nr_vecs = bio_get_nr_vecs(bdev);
+ return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
+}
+
+static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
+ int rw, u64 file_offset, int skip_sum,
+ u32 *csums)
+{
+ int write = rw & REQ_WRITE;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret;
+
+ bio_get(bio);
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+ if (ret)
+ goto err;
+
+ if (write && !skip_sum) {
+ ret = btrfs_wq_submit_bio(root->fs_info,
+ inode, rw, bio, 0, 0,
+ file_offset,
+ __btrfs_submit_bio_start_direct_io,
+ __btrfs_submit_bio_done);
+ goto err;
+ } else if (!skip_sum)
+ btrfs_lookup_bio_sums_dio(root, inode, bio,
+ file_offset, csums);
+
+ ret = btrfs_map_bio(root, rw, bio, 0, 1);
+err:
+ bio_put(bio);
+ return ret;
+}
+
+static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
+ int skip_sum)
+{
+ struct inode *inode = dip->inode;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+ struct bio *bio;
+ struct bio *orig_bio = dip->orig_bio;
+ struct bio_vec *bvec = orig_bio->bi_io_vec;
+ u64 start_sector = orig_bio->bi_sector;
+ u64 file_offset = dip->logical_offset;
+ u64 submit_len = 0;
+ u64 map_length;
+ int nr_pages = 0;
+ u32 *csums = dip->csums;
+ int ret = 0;
+
+ bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
+ if (!bio)
+ return -ENOMEM;
+ bio->bi_private = dip;
+ bio->bi_end_io = btrfs_end_dio_bio;
+ atomic_inc(&dip->pending_bios);
+
+ map_length = orig_bio->bi_size;
+ ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+ &map_length, NULL, 0);
+ if (ret) {
+ bio_put(bio);
+ return -EIO;
+ }
+
+ while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
+ if (unlikely(map_length < submit_len + bvec->bv_len ||
+ bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+ bvec->bv_offset) < bvec->bv_len)) {
+ /*
+ * inc the count before we submit the bio so
+ * we know the end IO handler won't happen before
+ * we inc the count. Otherwise, the dip might get freed
+ * before we're done setting it up
+ */
+ atomic_inc(&dip->pending_bios);
+ ret = __btrfs_submit_dio_bio(bio, inode, rw,
+ file_offset, skip_sum,
+ csums);
+ if (ret) {
+ bio_put(bio);
+ atomic_dec(&dip->pending_bios);
+ goto out_err;
+ }
+
+ if (!skip_sum)
+ csums = csums + nr_pages;
+ start_sector += submit_len >> 9;
+ file_offset += submit_len;
+
+ submit_len = 0;
+ nr_pages = 0;
+
+ bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
+ start_sector, GFP_NOFS);
+ if (!bio)
+ goto out_err;
+ bio->bi_private = dip;
+ bio->bi_end_io = btrfs_end_dio_bio;
+
+ map_length = orig_bio->bi_size;
+ ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+ &map_length, NULL, 0);
+ if (ret) {
+ bio_put(bio);
+ goto out_err;
+ }
+ } else {
+ submit_len += bvec->bv_len;
+ nr_pages ++;
+ bvec++;
+ }
+ }
+
+ ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
+ csums);
+ if (!ret)
+ return 0;
+
+ bio_put(bio);
+out_err:
+ dip->errors = 1;
+ /*
+ * before atomic variable goto zero, we must
+ * make sure dip->errors is perceived to be set.
+ */
+ smp_mb__before_atomic_dec();
+ if (atomic_dec_and_test(&dip->pending_bios))
+ bio_io_error(dip->orig_bio);
+
+ /* bio_end_io() will handle error, so we needn't return it */
+ return 0;
+}
+
static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
loff_t file_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_dio_private *dip;
struct bio_vec *bvec = bio->bi_io_vec;
- u64 start;
int skip_sum;
int write = rw & REQ_WRITE;
int ret = 0;
@@ -5671,7 +5907,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
dip->inode = inode;
dip->logical_offset = file_offset;
- start = dip->logical_offset;
dip->bytes = 0;
do {
dip->bytes += bvec->bv_len;
@@ -5680,36 +5915,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
dip->disk_bytenr = (u64)bio->bi_sector << 9;
bio->bi_private = dip;
+ dip->errors = 0;
+ dip->orig_bio = bio;
+ atomic_set(&dip->pending_bios, 0);
if (write)
bio->bi_end_io = btrfs_endio_direct_write;
else
bio->bi_end_io = btrfs_endio_direct_read;
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
- if (ret)
- goto out_err;
-
- if (write && !skip_sum) {
- ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
- inode, rw, bio, 0, 0,
- dip->logical_offset,
- __btrfs_submit_bio_start_direct_io,
- __btrfs_submit_bio_done);
- if (ret)
- goto out_err;
+ ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
+ if (!ret)
return;
- } else if (!skip_sum)
- btrfs_lookup_bio_sums_dio(root, inode, bio,
- dip->logical_offset, dip->csums);
-
- ret = btrfs_map_bio(root, rw, bio, 0, 1);
- if (ret)
- goto out_err;
- return;
-out_err:
- kfree(dip->csums);
- kfree(dip);
free_ordered:
/*
* If this is a write, we need to clean up the reserved space and kill
@@ -5717,8 +5934,7 @@ free_ordered:
*/
if (write) {
struct btrfs_ordered_extent *ordered;
- ordered = btrfs_lookup_ordered_extent(inode,
- dip->logical_offset);
+ ordered = btrfs_lookup_ordered_extent(inode, file_offset);
if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
btrfs_free_reserved_extent(root, ordered->start,
@@ -6279,6 +6495,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
return inode;
}
+static void btrfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+}
+
void btrfs_destroy_inode(struct inode *inode)
{
struct btrfs_ordered_extent *ordered;
@@ -6308,6 +6531,21 @@ void btrfs_destroy_inode(struct inode *inode)
spin_unlock(&root->fs_info->ordered_extent_lock);
}
+ if (root == root->fs_info->tree_root) {
+ struct btrfs_block_group_cache *block_group;
+
+ block_group = btrfs_lookup_block_group(root->fs_info,
+ BTRFS_I(inode)->block_group);
+ if (block_group && block_group->inode == inode) {
+ spin_lock(&block_group->lock);
+ block_group->inode = NULL;
+ spin_unlock(&block_group->lock);
+ btrfs_put_block_group(block_group);
+ } else if (block_group) {
+ btrfs_put_block_group(block_group);
+ }
+ }
+
spin_lock(&root->orphan_lock);
if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
@@ -6333,14 +6571,15 @@ void btrfs_destroy_inode(struct inode *inode)
inode_tree_del(inode);
btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
free:
- kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+ call_rcu(&inode->i_rcu, btrfs_i_callback);
}
int btrfs_drop_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- if (btrfs_root_refs(&root->root_item) == 0)
+ if (btrfs_root_refs(&root->root_item) == 0 &&
+ root != root->fs_info->tree_root)
return 1;
else
return generic_drop_inode(inode);
@@ -6548,8 +6787,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
BUG_ON(ret);
if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
- btrfs_log_new_name(trans, old_inode, old_dir,
- new_dentry->d_parent);
+ struct dentry *parent = dget_parent(new_dentry);
+ btrfs_log_new_name(trans, old_inode, old_dir, parent);
+ dput(parent);
btrfs_end_log_trans(root);
}
out_fail:
@@ -6609,7 +6849,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
return 0;
}
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+ int sync)
{
struct btrfs_inode *binode;
struct inode *inode = NULL;
@@ -6631,7 +6872,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
spin_unlock(&root->fs_info->delalloc_lock);
if (inode) {
- write_inode_now(inode, 0);
+ if (sync) {
+ filemap_write_and_wait(inode->i_mapping);
+ /*
+ * We have to do this because compression doesn't
+ * actually set PG_writeback until it submits the pages
+ * for IO, which happens in an async thread, so we could
+ * race and not actually wait for any writeback pages
+ * because they've not been submitted yet. Technically
+ * this could still be the case for the ordered stuff
+ * since the async thread may not have started to do its
+ * work yet. If this becomes the case then we need to
+ * figure out a way to make sure that in writepage we
+ * wait for any async pages to be submitted before
+ * returning so that fdatawait does what its supposed to
+ * do.
+ */
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ } else {
+ filemap_flush(inode->i_mapping);
+ }
if (delay_iput)
btrfs_add_delayed_iput(inode);
else
@@ -6679,8 +6939,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
btrfs_set_trans_block_group(trans, dir);
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len,
- dentry->d_parent->d_inode->i_ino, objectid,
+ dentry->d_name.len, dir->i_ino, objectid,
BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
&index);
err = PTR_ERR(inode);
@@ -6694,7 +6953,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
}
btrfs_set_trans_block_group(trans, inode);
- err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+ err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
drop_inode = 1;
else {
@@ -6757,27 +7016,34 @@ out_unlock:
return err;
}
-int btrfs_prealloc_file_range(struct inode *inode, int mode,
- u64 start, u64 num_bytes, u64 min_size,
- loff_t actual_len, u64 *alloc_hint)
+static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
+ u64 start, u64 num_bytes, u64 min_size,
+ loff_t actual_len, u64 *alloc_hint,
+ struct btrfs_trans_handle *trans)
{
- struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
u64 cur_offset = start;
+ u64 i_size;
int ret = 0;
+ bool own_trans = true;
+ if (trans)
+ own_trans = false;
while (num_bytes > 0) {
- trans = btrfs_start_transaction(root, 3);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- break;
+ if (own_trans) {
+ trans = btrfs_start_transaction(root, 3);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
}
ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
0, *alloc_hint, (u64)-1, &ins, 1);
if (ret) {
- btrfs_end_transaction(trans, root);
+ if (own_trans)
+ btrfs_end_transaction(trans, root);
break;
}
@@ -6800,21 +7066,40 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode,
(actual_len > inode->i_size) &&
(cur_offset > inode->i_size)) {
if (cur_offset > actual_len)
- i_size_write(inode, actual_len);
+ i_size = actual_len;
else
- i_size_write(inode, cur_offset);
- i_size_write(inode, cur_offset);
- btrfs_ordered_update_i_size(inode, cur_offset, NULL);
+ i_size = cur_offset;
+ i_size_write(inode, i_size);
+ btrfs_ordered_update_i_size(inode, i_size, NULL);
}
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
- btrfs_end_transaction(trans, root);
+ if (own_trans)
+ btrfs_end_transaction(trans, root);
}
return ret;
}
+int btrfs_prealloc_file_range(struct inode *inode, int mode,
+ u64 start, u64 num_bytes, u64 min_size,
+ loff_t actual_len, u64 *alloc_hint)
+{
+ return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+ min_size, actual_len, alloc_hint,
+ NULL);
+}
+
+int btrfs_prealloc_file_range_trans(struct inode *inode,
+ struct btrfs_trans_handle *trans, int mode,
+ u64 start, u64 num_bytes, u64 min_size,
+ loff_t actual_len, u64 *alloc_hint)
+{
+ return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+ min_size, actual_len, alloc_hint, trans);
+}
+
static long btrfs_fallocate(struct inode *inode, int mode,
loff_t offset, loff_t len)
{
@@ -6839,6 +7124,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
mutex_lock(&inode->i_mutex);
+ ret = inode_newsize_ok(inode, alloc_end);
+ if (ret)
+ goto out;
+
if (alloc_start > inode->i_size) {
ret = btrfs_cont_expand(inode, alloc_start);
if (ret)
@@ -6922,11 +7211,11 @@ static int btrfs_set_page_dirty(struct page *page)
return __set_page_dirty_nobuffers(page);
}
-static int btrfs_permission(struct inode *inode, int mask)
+static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
{
if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
return -EACCES;
- return generic_permission(inode, mask, btrfs_check_acl);
+ return generic_permission(inode, mask, flags, btrfs_check_acl);
}
static const struct inode_operations btrfs_dir_inode_operations = {
@@ -7035,6 +7324,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
+ .getattr = btrfs_getattr,
.permission = btrfs_permission,
.setxattr = btrfs_setxattr,
.getxattr = btrfs_getxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9254b3d..f87552a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -224,7 +224,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
static noinline int create_subvol(struct btrfs_root *root,
struct dentry *dentry,
- char *name, int namelen)
+ char *name, int namelen,
+ u64 *async_transid)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
@@ -232,7 +233,8 @@ static noinline int create_subvol(struct btrfs_root *root,
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
struct btrfs_root *new_root;
- struct inode *dir = dentry->d_parent->d_inode;
+ struct dentry *parent = dget_parent(dentry);
+ struct inode *dir;
int ret;
int err;
u64 objectid;
@@ -241,8 +243,13 @@ static noinline int create_subvol(struct btrfs_root *root,
ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root,
0, &objectid);
- if (ret)
+ if (ret) {
+ dput(parent);
return ret;
+ }
+
+ dir = parent->d_inode;
+
/*
* 1 - inode item
* 2 - refs
@@ -250,8 +257,10 @@ static noinline int create_subvol(struct btrfs_root *root,
* 2 - dir items
*/
trans = btrfs_start_transaction(root, 6);
- if (IS_ERR(trans))
+ if (IS_ERR(trans)) {
+ dput(parent);
return PTR_ERR(trans);
+ }
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
@@ -338,15 +347,23 @@ static noinline int create_subvol(struct btrfs_root *root,
d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
fail:
- err = btrfs_commit_transaction(trans, root);
+ dput(parent);
+ if (async_transid) {
+ *async_transid = trans->transid;
+ err = btrfs_commit_transaction_async(trans, root, 1);
+ } else {
+ err = btrfs_commit_transaction(trans, root);
+ }
if (err && !ret)
ret = err;
return ret;
}
-static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
+static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
+ char *name, int namelen, u64 *async_transid)
{
struct inode *inode;
+ struct dentry *parent;
struct btrfs_pending_snapshot *pending_snapshot;
struct btrfs_trans_handle *trans;
int ret;
@@ -373,7 +390,14 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
- ret = btrfs_commit_transaction(trans, root->fs_info->extent_root);
+ if (async_transid) {
+ *async_transid = trans->transid;
+ ret = btrfs_commit_transaction_async(trans,
+ root->fs_info->extent_root, 1);
+ } else {
+ ret = btrfs_commit_transaction(trans,
+ root->fs_info->extent_root);
+ }
BUG_ON(ret);
ret = pending_snapshot->error;
@@ -382,7 +406,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
btrfs_orphan_cleanup(pending_snapshot->snap);
- inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
+ parent = dget_parent(dentry);
+ inode = btrfs_lookup_dentry(parent->d_inode, dentry);
+ dput(parent);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
goto fail;
@@ -395,6 +421,76 @@ fail:
return ret;
}
+/* copy of check_sticky in fs/namei.c()
+* It's inline, so penalty for filesystems that don't use sticky bit is
+* minimal.
+*/
+static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
+{
+ uid_t fsuid = current_fsuid();
+
+ if (!(dir->i_mode & S_ISVTX))
+ return 0;
+ if (inode->i_uid == fsuid)
+ return 0;
+ if (dir->i_uid == fsuid)
+ return 0;
+ return !capable(CAP_FOWNER);
+}
+
+/* copy of may_delete in fs/namei.c()
+ * Check whether we can remove a link victim from directory dir, check
+ * whether the type of victim is right.
+ * 1. We can't do it if dir is read-only (done in permission())
+ * 2. We should have write and exec permissions on dir
+ * 3. We can't remove anything from append-only dir
+ * 4. We can't do anything with immutable dir (done in permission())
+ * 5. If the sticky bit on dir is set we should either
+ * a. be owner of dir, or
+ * b. be owner of victim, or
+ * c. have CAP_FOWNER capability
+ * 6. If the victim is append-only or immutable we can't do antyhing with
+ * links pointing to it.
+ * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
+ * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
+ * 9. We can't remove a root or mountpoint.
+ * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
+ * nfs_async_unlink().
+ */
+
+static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
+{
+ int error;
+
+ if (!victim->d_inode)
+ return -ENOENT;
+
+ BUG_ON(victim->d_parent->d_inode != dir);
+ audit_inode_child(victim, dir);
+
+ error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ if (error)
+ return error;
+ if (IS_APPEND(dir))
+ return -EPERM;
+ if (btrfs_check_sticky(dir, victim->d_inode)||
+ IS_APPEND(victim->d_inode)||
+ IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+ return -EPERM;
+ if (isdir) {
+ if (!S_ISDIR(victim->d_inode->i_mode))
+ return -ENOTDIR;
+ if (IS_ROOT(victim))
+ return -EBUSY;
+ } else if (S_ISDIR(victim->d_inode->i_mode))
+ return -EISDIR;
+ if (IS_DEADDIR(dir))
+ return -ENOENT;
+ if (victim->d_flags & DCACHE_NFSFS_RENAMED)
+ return -EBUSY;
+ return 0;
+}
+
/* copy of may_create in fs/namei.c() */
static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
{
@@ -412,7 +508,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
*/
static noinline int btrfs_mksubvol(struct path *parent,
char *name, int namelen,
- struct btrfs_root *snap_src)
+ struct btrfs_root *snap_src,
+ u64 *async_transid)
{
struct inode *dir = parent->dentry->d_inode;
struct dentry *dentry;
@@ -443,10 +540,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
goto out_up_read;
if (snap_src) {
- error = create_snapshot(snap_src, dentry);
+ error = create_snapshot(snap_src, dentry,
+ name, namelen, async_transid);
} else {
error = create_subvol(BTRFS_I(dir)->root, dentry,
- name, namelen);
+ name, namelen, async_transid);
}
if (!error)
fsnotify_mkdir(dir, dentry);
@@ -708,7 +806,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
char *sizestr;
char *devstr = NULL;
int ret = 0;
- int namelen;
int mod = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY)
@@ -722,7 +819,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- namelen = strlen(vol_args->name);
mutex_lock(&root->fs_info->volume_mutex);
sizestr = vol_args->name;
@@ -801,11 +897,13 @@ out_unlock:
return ret;
}
-static noinline int btrfs_ioctl_snap_create(struct file *file,
- void __user *arg, int subvol)
+static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
+ char *name,
+ unsigned long fd,
+ int subvol,
+ u64 *transid)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
- struct btrfs_ioctl_vol_args *vol_args;
struct file *src_file;
int namelen;
int ret = 0;
@@ -813,23 +911,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
- vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args))
- return PTR_ERR(vol_args);
-
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- namelen = strlen(vol_args->name);
- if (strchr(vol_args->name, '/')) {
+ namelen = strlen(name);
+ if (strchr(name, '/')) {
ret = -EINVAL;
goto out;
}
if (subvol) {
- ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
- NULL);
+ ret = btrfs_mksubvol(&file->f_path, name, namelen,
+ NULL, transid);
} else {
struct inode *src_inode;
- src_file = fget(vol_args->fd);
+ src_file = fget(fd);
if (!src_file) {
ret = -EINVAL;
goto out;
@@ -843,12 +936,68 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
fput(src_file);
goto out;
}
- ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
- BTRFS_I(src_inode)->root);
+ ret = btrfs_mksubvol(&file->f_path, name, namelen,
+ BTRFS_I(src_inode)->root,
+ transid);
fput(src_file);
}
out:
+ return ret;
+}
+
+static noinline int btrfs_ioctl_snap_create(struct file *file,
+ void __user *arg, int subvol,
+ int v2)
+{
+ struct btrfs_ioctl_vol_args *vol_args = NULL;
+ struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL;
+ char *name;
+ u64 fd;
+ int ret;
+
+ if (v2) {
+ u64 transid = 0;
+ u64 *ptr = NULL;
+
+ vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2));
+ if (IS_ERR(vol_args_v2))
+ return PTR_ERR(vol_args_v2);
+
+ if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ name = vol_args_v2->name;
+ fd = vol_args_v2->fd;
+ vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+
+ if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC)
+ ptr = &transid;
+
+ ret = btrfs_ioctl_snap_create_transid(file, name, fd,
+ subvol, ptr);
+
+ if (ret == 0 && ptr &&
+ copy_to_user(arg +
+ offsetof(struct btrfs_ioctl_vol_args_v2,
+ transid), ptr, sizeof(*ptr)))
+ ret = -EFAULT;
+ } else {
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+ name = vol_args->name;
+ fd = vol_args->fd;
+ vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+
+ ret = btrfs_ioctl_snap_create_transid(file, name, fd,
+ subvol, NULL);
+ }
+out:
kfree(vol_args);
+ kfree(vol_args_v2);
+
return ret;
}
@@ -1073,14 +1222,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- args = kmalloc(sizeof(*args), GFP_KERNEL);
- if (!args)
- return -ENOMEM;
+ args = memdup_user(argp, sizeof(*args));
+ if (IS_ERR(args))
+ return PTR_ERR(args);
- if (copy_from_user(args, argp, sizeof(*args))) {
- kfree(args);
- return -EFAULT;
- }
inode = fdentry(file)->d_inode;
ret = search_ioctl(inode, args);
if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
@@ -1188,14 +1333,10 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- args = kmalloc(sizeof(*args), GFP_KERNEL);
- if (!args)
- return -ENOMEM;
+ args = memdup_user(argp, sizeof(*args));
+ if (IS_ERR(args))
+ return PTR_ERR(args);
- if (copy_from_user(args, argp, sizeof(*args))) {
- kfree(args);
- return -EFAULT;
- }
inode = fdentry(file)->d_inode;
if (args->treeid == 0)
@@ -1227,9 +1368,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
int ret;
int err = 0;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -1259,13 +1397,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
}
inode = dentry->d_inode;
+ dest = BTRFS_I(inode)->root;
+ if (!capable(CAP_SYS_ADMIN)){
+ /*
+ * Regular user. Only allow this with a special mount
+ * option, when the user has write+exec access to the
+ * subvol root, and when rmdir(2) would have been
+ * allowed.
+ *
+ * Note that this is _not_ check that the subvol is
+ * empty or doesn't contain data that we wouldn't
+ * otherwise be able to delete.
+ *
+ * Users who want to delete empty subvols should try
+ * rmdir(2).
+ */
+ err = -EPERM;
+ if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+ goto out_dput;
+
+ /*
+ * Do not allow deletion if the parent dir is the same
+ * as the dir to be deleted. That means the ioctl
+ * must be called on the dentry referencing the root
+ * of the subvol, not a random directory contained
+ * within it.
+ */
+ err = -EINVAL;
+ if (root == dest)
+ goto out_dput;
+
+ err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
+ if (err)
+ goto out_dput;
+
+ /* check if subvolume may be deleted by a non-root user */
+ err = btrfs_may_delete(dir, dentry, 1);
+ if (err)
+ goto out_dput;
+ }
+
if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
err = -EINVAL;
goto out_dput;
}
- dest = BTRFS_I(inode)->root;
-
mutex_lock(&inode->i_mutex);
err = d_invalidate(dentry);
if (err)
@@ -1304,7 +1480,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
BUG_ON(ret);
}
- ret = btrfs_commit_transaction(trans, root);
+ ret = btrfs_end_transaction(trans, root);
BUG_ON(ret);
inode->i_flags |= S_DEAD;
out_up_write:
@@ -1502,11 +1678,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
path->reada = 2;
if (inode < src) {
- mutex_lock(&inode->i_mutex);
- mutex_lock(&src->i_mutex);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
} else {
- mutex_lock(&src->i_mutex);
- mutex_lock(&inode->i_mutex);
+ mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
}
/* determine range to clone */
@@ -1517,12 +1693,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
olen = len = src->i_size - off;
/* if we extend to eof, continue to block boundary */
if (off + len == src->i_size)
- len = ((src->i_size + bs-1) & ~(bs-1))
- - off;
+ len = ALIGN(src->i_size, bs) - off;
/* verify the end result is block aligned */
- if ((off & (bs-1)) ||
- ((off + len) & (bs-1)))
+ if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
+ !IS_ALIGNED(destoff, bs))
goto out_unlock;
/* do any pending delalloc/csum calc on src, one way or
@@ -1530,13 +1705,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
while (1) {
struct btrfs_ordered_extent *ordered;
lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
- ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
- if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
+ ordered = btrfs_lookup_first_ordered_extent(src, off+len);
+ if (!ordered &&
+ !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
+ EXTENT_DELALLOC, 0, NULL))
break;
unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
if (ordered)
btrfs_put_ordered_extent(ordered);
- btrfs_wait_ordered_range(src, off, off+len);
+ btrfs_wait_ordered_range(src, off, len);
}
/* clone data */
@@ -1605,7 +1782,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
}
btrfs_release_path(root, path);
- if (key.offset + datal < off ||
+ if (key.offset + datal <= off ||
key.offset >= off+len)
goto next;
@@ -1720,8 +1897,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
* but shouldn't round up the file size
*/
endoff = new_key.offset + datal;
- if (endoff > off+olen)
- endoff = off+olen;
+ if (endoff > destoff+olen)
+ endoff = destoff+olen;
if (endoff > inode->i_size)
btrfs_i_size_write(inode, endoff);
@@ -1879,6 +2056,22 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
return 0;
}
+static void get_block_group_info(struct list_head *groups_list,
+ struct btrfs_ioctl_space_info *space)
+{
+ struct btrfs_block_group_cache *block_group;
+
+ space->total_bytes = 0;
+ space->used_bytes = 0;
+ space->flags = 0;
+ list_for_each_entry(block_group, groups_list, list) {
+ space->flags = block_group->flags;
+ space->total_bytes += block_group->key.offset;
+ space->used_bytes +=
+ btrfs_block_group_used(&block_group->item);
+ }
+}
+
long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_space_args space_args;
@@ -1887,27 +2080,56 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
struct btrfs_ioctl_space_info *dest_orig;
struct btrfs_ioctl_space_info *user_dest;
struct btrfs_space_info *info;
+ u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
+ BTRFS_BLOCK_GROUP_SYSTEM,
+ BTRFS_BLOCK_GROUP_METADATA,
+ BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
+ int num_types = 4;
int alloc_size;
int ret = 0;
int slot_count = 0;
+ int i, c;
if (copy_from_user(&space_args,
(struct btrfs_ioctl_space_args __user *)arg,
sizeof(space_args)))
return -EFAULT;
- /* first we count slots */
- rcu_read_lock();
- list_for_each_entry_rcu(info, &root->fs_info->space_info, list)
- slot_count++;
- rcu_read_unlock();
+ for (i = 0; i < num_types; i++) {
+ struct btrfs_space_info *tmp;
+
+ info = NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+ list) {
+ if (tmp->flags == types[i]) {
+ info = tmp;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!info)
+ continue;
+
+ down_read(&info->groups_sem);
+ for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
+ if (!list_empty(&info->block_groups[c]))
+ slot_count++;
+ }
+ up_read(&info->groups_sem);
+ }
/* space_slots == 0 means they are asking for a count */
if (space_args.space_slots == 0) {
space_args.total_spaces = slot_count;
goto out;
}
+
+ slot_count = min_t(int, space_args.space_slots, slot_count);
+
alloc_size = sizeof(*dest) * slot_count;
+
/* we generally have at most 6 or so space infos, one for each raid
* level. So, a whole page should be more than enough for everyone
*/
@@ -1921,27 +2143,34 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
dest_orig = dest;
/* now we have a buffer to copy into */
- rcu_read_lock();
- list_for_each_entry_rcu(info, &root->fs_info->space_info, list) {
- /* make sure we don't copy more than we allocated
- * in our buffer
- */
- if (slot_count == 0)
- break;
- slot_count--;
-
- /* make sure userland has enough room in their buffer */
- if (space_args.total_spaces >= space_args.space_slots)
- break;
+ for (i = 0; i < num_types; i++) {
+ struct btrfs_space_info *tmp;
+
+ info = NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+ list) {
+ if (tmp->flags == types[i]) {
+ info = tmp;
+ break;
+ }
+ }
+ rcu_read_unlock();
- space.flags = info->flags;
- space.total_bytes = info->total_bytes;
- space.used_bytes = info->bytes_used;
- memcpy(dest, &space, sizeof(space));
- dest++;
- space_args.total_spaces++;
+ if (!info)
+ continue;
+ down_read(&info->groups_sem);
+ for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
+ if (!list_empty(&info->block_groups[c])) {
+ get_block_group_info(&info->block_groups[c],
+ &space);
+ memcpy(dest, &space, sizeof(space));
+ dest++;
+ space_args.total_spaces++;
+ }
+ }
+ up_read(&info->groups_sem);
}
- rcu_read_unlock();
user_dest = (struct btrfs_ioctl_space_info *)
(arg + sizeof(struct btrfs_ioctl_space_args));
@@ -1984,6 +2213,36 @@ long btrfs_ioctl_trans_end(struct file *file)
return 0;
}
+static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
+{
+ struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+ struct btrfs_trans_handle *trans;
+ u64 transid;
+
+ trans = btrfs_start_transaction(root, 0);
+ transid = trans->transid;
+ btrfs_commit_transaction_async(trans, root, 0);
+
+ if (argp)
+ if (copy_to_user(argp, &transid, sizeof(transid)))
+ return -EFAULT;
+ return 0;
+}
+
+static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
+{
+ struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+ u64 transid;
+
+ if (argp) {
+ if (copy_from_user(&transid, argp, sizeof(transid)))
+ return -EFAULT;
+ } else {
+ transid = 0; /* current trans */
+ }
+ return btrfs_wait_for_commit(root, transid);
+}
+
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@@ -1998,9 +2257,11 @@ long btrfs_ioctl(struct file *file, unsigned int
case FS_IOC_GETVERSION:
return btrfs_ioctl_getversion(file, argp);
case BTRFS_IOC_SNAP_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 0);
+ return btrfs_ioctl_snap_create(file, argp, 0, 0);
+ case BTRFS_IOC_SNAP_CREATE_V2:
+ return btrfs_ioctl_snap_create(file, argp, 0, 1);
case BTRFS_IOC_SUBVOL_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 1);
+ return btrfs_ioctl_snap_create(file, argp, 1, 0);
case BTRFS_IOC_SNAP_DESTROY:
return btrfs_ioctl_snap_destroy(file, argp);
case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -2034,6 +2295,10 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SYNC:
btrfs_sync_fs(file->f_dentry->d_sb, 1);
return 0;
+ case BTRFS_IOC_START_SYNC:
+ return btrfs_ioctl_start_sync(file, argp);
+ case BTRFS_IOC_WAIT_SYNC:
+ return btrfs_ioctl_wait_sync(file, argp);
}
return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 424694a..c344d12 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -22,14 +22,25 @@
#define BTRFS_IOCTL_MAGIC 0x94
#define BTRFS_VOL_NAME_MAX 255
-#define BTRFS_PATH_NAME_MAX 4087
/* this should be 4k */
+#define BTRFS_PATH_NAME_MAX 4087
struct btrfs_ioctl_vol_args {
__s64 fd;
char name[BTRFS_PATH_NAME_MAX + 1];
};
+#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
+
+#define BTRFS_SUBVOL_NAME_MAX 4039
+struct btrfs_ioctl_vol_args_v2 {
+ __s64 fd;
+ __u64 transid;
+ __u64 flags;
+ __u64 unused[4];
+ char name[BTRFS_SUBVOL_NAME_MAX + 1];
+};
+
#define BTRFS_INO_LOOKUP_PATH_MAX 4080
struct btrfs_ioctl_ino_lookup_args {
__u64 treeid;
@@ -178,4 +189,8 @@ struct btrfs_ioctl_space_args {
#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
struct btrfs_ioctl_space_args)
+#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
+#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
+#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
+ struct btrfs_ioctl_vol_args_v2)
#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e56c72b..ae7737e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -250,6 +250,73 @@ int btrfs_add_ordered_sum(struct inode *inode,
/*
* this is used to account for finished IO across a given range
+ * of the file. The IO may span ordered extents. If
+ * a given ordered_extent is completely done, 1 is returned, otherwise
+ * 0.
+ *
+ * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
+ * to make sure this function only returns 1 once for a given ordered extent.
+ *
+ * file_offset is updated to one byte past the range that is recorded as
+ * complete. This allows you to walk forward in the file.
+ */
+int btrfs_dec_test_first_ordered_pending(struct inode *inode,
+ struct btrfs_ordered_extent **cached,
+ u64 *file_offset, u64 io_size)
+{
+ struct btrfs_ordered_inode_tree *tree;
+ struct rb_node *node;
+ struct btrfs_ordered_extent *entry = NULL;
+ int ret;
+ u64 dec_end;
+ u64 dec_start;
+ u64 to_dec;
+
+ tree = &BTRFS_I(inode)->ordered_tree;
+ spin_lock(&tree->lock);
+ node = tree_search(tree, *file_offset);
+ if (!node) {
+ ret = 1;
+ goto out;
+ }
+
+ entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+ if (!offset_in_entry(entry, *file_offset)) {
+ ret = 1;
+ goto out;
+ }
+
+ dec_start = max(*file_offset, entry->file_offset);
+ dec_end = min(*file_offset + io_size, entry->file_offset +
+ entry->len);
+ *file_offset = dec_end;
+ if (dec_start > dec_end) {
+ printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
+ (unsigned long long)dec_start,
+ (unsigned long long)dec_end);
+ }
+ to_dec = dec_end - dec_start;
+ if (to_dec > entry->bytes_left) {
+ printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
+ (unsigned long long)entry->bytes_left,
+ (unsigned long long)to_dec);
+ }
+ entry->bytes_left -= to_dec;
+ if (entry->bytes_left == 0)
+ ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+ else
+ ret = 1;
+out:
+ if (!ret && cached && entry) {
+ *cached = entry;
+ atomic_inc(&entry->refs);
+ }
+ spin_unlock(&tree->lock);
+ return ret == 0;
+}
+
+/*
+ * this is used to account for finished IO across a given range
* of the file. The IO should not span ordered extents. If
* a given ordered_extent is completely done, 1 is returned, otherwise
* 0.
@@ -526,7 +593,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{
u64 end;
u64 orig_end;
- u64 wait_end;
struct btrfs_ordered_extent *ordered;
int found;
@@ -537,7 +603,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
if (orig_end > INT_LIMIT(loff_t))
orig_end = INT_LIMIT(loff_t);
}
- wait_end = orig_end;
again:
/* start IO across the range first to instantiate any delalloc
* extents
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 8ac3654..61dca83 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -141,6 +141,9 @@ int btrfs_remove_ordered_extent(struct inode *inode,
int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size);
+int btrfs_dec_test_first_ordered_pending(struct inode *inode,
+ struct btrfs_ordered_extent **cached,
+ u64 *file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 79cba5f..f8be250 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -56,8 +56,12 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
return -ENOMEM;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret)
+ if (ret < 0)
goto out;
+ if (ret) {
+ ret = -ENOENT;
+ goto out;
+ }
ret = btrfs_del_item(trans, root, path);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b37d723..045c9c2 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -29,6 +29,7 @@
#include "locking.h"
#include "btrfs_inode.h"
#include "async-thread.h"
+#include "free-space-cache.h"
/*
* backref_node, mapping_node and tree_block start with this
@@ -178,8 +179,6 @@ struct reloc_control {
u64 search_start;
u64 extents_found;
- int block_rsv_retries;
-
unsigned int stage:8;
unsigned int create_reloc_tree:1;
unsigned int merge_reloc_tree:1;
@@ -2133,7 +2132,6 @@ int prepare_to_merge(struct reloc_control *rc, int err)
LIST_HEAD(reloc_roots);
u64 num_bytes = 0;
int ret;
- int retries = 0;
mutex_lock(&root->fs_info->trans_mutex);
rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
@@ -2143,7 +2141,7 @@ again:
if (!err) {
num_bytes = rc->merging_rsv_size;
ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
- num_bytes, &retries);
+ num_bytes);
if (ret)
err = ret;
}
@@ -2155,7 +2153,6 @@ again:
btrfs_end_transaction(trans, rc->extent_root);
btrfs_block_rsv_release(rc->extent_root,
rc->block_rsv, num_bytes);
- retries = 0;
goto again;
}
}
@@ -2405,15 +2402,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
num_bytes = calcu_metadata_size(rc, node, 1) * 2;
trans->block_rsv = rc->block_rsv;
- ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes,
- &rc->block_rsv_retries);
+ ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
if (ret) {
if (ret == -EAGAIN)
rc->commit_transaction = 1;
return ret;
}
- rc->block_rsv_retries = 0;
return 0;
}
@@ -3099,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc,
BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
ret = get_ref_objectid_v0(rc, path, extent_key,
&ref_owner, NULL);
+ if (ret < 0)
+ return ret;
BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
level = (int)ref_owner;
/* FIXME: get real generation */
@@ -3191,6 +3188,54 @@ static int block_use_full_backref(struct reloc_control *rc,
return ret;
}
+static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
+ struct inode *inode, u64 ino)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ struct btrfs_root *root = fs_info->tree_root;
+ struct btrfs_trans_handle *trans;
+ unsigned long nr;
+ int ret = 0;
+
+ if (inode)
+ goto truncate;
+
+ key.objectid = ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
+ if (inode && !IS_ERR(inode))
+ iput(inode);
+ return -ENOENT;
+ }
+
+truncate:
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ trans = btrfs_join_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ goto out;
+ }
+
+ ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+
+ btrfs_free_path(path);
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_btree_balance_dirty(root, nr);
+out:
+ iput(inode);
+ return ret;
+}
+
/*
* helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
* this function scans fs tree to find blocks reference the data extent
@@ -3217,15 +3262,27 @@ static int find_data_references(struct reloc_control *rc,
int counted;
int ret;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
ref_root = btrfs_extent_data_ref_root(leaf, ref);
ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
ref_count = btrfs_extent_data_ref_count(leaf, ref);
+ /*
+ * This is an extent belonging to the free space cache, lets just delete
+ * it and redo the search.
+ */
+ if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
+ ret = delete_block_group_cache(rc->extent_root->fs_info,
+ NULL, ref_objectid);
+ if (ret != -ENOENT)
+ return ret;
+ ret = 0;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
root = read_fs_root(rc->extent_root->fs_info, ref_root);
if (IS_ERR(root)) {
err = PTR_ERR(root);
@@ -3554,8 +3611,7 @@ int prepare_to_relocate(struct reloc_control *rc)
* is no reservation in transaction handle.
*/
ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
- rc->extent_root->nodesize * 256,
- &rc->block_rsv_retries);
+ rc->extent_root->nodesize * 256);
if (ret)
return ret;
@@ -3567,7 +3623,6 @@ int prepare_to_relocate(struct reloc_control *rc)
rc->extents_found = 0;
rc->nodes_relocated = 0;
rc->merging_rsv_size = 0;
- rc->block_rsv_retries = 0;
rc->create_reloc_tree = 1;
set_reloc_control(rc);
@@ -3860,6 +3915,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
{
struct btrfs_fs_info *fs_info = extent_root->fs_info;
struct reloc_control *rc;
+ struct inode *inode;
+ struct btrfs_path *path;
int ret;
int rw = 0;
int err = 0;
@@ -3882,6 +3939,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
rw = 1;
}
+ path = btrfs_alloc_path();
+ if (!path) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
+ path);
+ btrfs_free_path(path);
+
+ if (!IS_ERR(inode))
+ ret = delete_block_group_cache(fs_info, inode, 0);
+ else
+ ret = PTR_ERR(inode);
+
+ if (ret && ret != -ENOENT) {
+ err = ret;
+ goto out;
+ }
+
rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
if (IS_ERR(rc->data_inode)) {
err = PTR_ERR(rc->data_inode);
@@ -4143,7 +4220,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
btrfs_add_ordered_sum(inode, ordered, sums);
}
btrfs_put_ordered_extent(ordered);
- return 0;
+ return ret;
}
void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 2d958be..6a1086e 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
{
struct btrfs_root *dead_root;
- struct btrfs_item *item;
struct btrfs_root_item *ri;
struct btrfs_key key;
struct btrfs_key found_key;
@@ -214,7 +213,6 @@ again:
nritems = btrfs_header_nritems(leaf);
slot = path->slots[0];
}
- item = btrfs_item_nr(leaf, slot);
btrfs_item_key_to_cpu(leaf, &key, slot);
if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
goto next;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 144f8a5..883c6fa 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -61,6 +61,8 @@ static void btrfs_put_super(struct super_block *sb)
ret = close_ctree(root);
sb->s_fs_info = NULL;
+
+ (void)ret; /* FIXME: need to fix VFS to return error? */
}
enum {
@@ -68,7 +70,8 @@ enum {
Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
- Opt_discard, Opt_err,
+ Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
+ Opt_user_subvol_rm_allowed,
};
static match_table_t tokens = {
@@ -92,6 +95,9 @@ static match_table_t tokens = {
{Opt_flushoncommit, "flushoncommit"},
{Opt_ratio, "metadata_ratio=%d"},
{Opt_discard, "discard"},
+ {Opt_space_cache, "space_cache"},
+ {Opt_clear_cache, "clear_cache"},
+ {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
{Opt_err, NULL},
};
@@ -235,6 +241,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_discard:
btrfs_set_opt(info->mount_opt, DISCARD);
break;
+ case Opt_space_cache:
+ printk(KERN_INFO "btrfs: enabling disk space caching\n");
+ btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+ break;
+ case Opt_clear_cache:
+ printk(KERN_INFO "btrfs: force clearing of disk cache\n");
+ btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
+ break;
+ case Opt_user_subvol_rm_allowed:
+ btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
+ break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
@@ -380,7 +397,7 @@ static struct dentry *get_default_root(struct super_block *sb,
find_root:
new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
if (IS_ERR(new_root))
- return ERR_PTR(PTR_ERR(new_root));
+ return ERR_CAST(new_root);
if (btrfs_root_refs(&new_root->root_item) == 0)
return ERR_PTR(-ENOENT);
@@ -436,7 +453,6 @@ static int btrfs_fill_super(struct super_block *sb,
{
struct inode *inode;
struct dentry *root_dentry;
- struct btrfs_super_block *disk_super;
struct btrfs_root *tree_root;
struct btrfs_key key;
int err;
@@ -458,7 +474,6 @@ static int btrfs_fill_super(struct super_block *sb,
return PTR_ERR(tree_root);
}
sb->s_fs_info = tree_root;
- disk_super = &tree_root->fs_info->super_copy;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
@@ -548,30 +563,45 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
static int btrfs_test_super(struct super_block *s, void *data)
{
- struct btrfs_fs_devices *test_fs_devices = data;
+ struct btrfs_root *test_root = data;
struct btrfs_root *root = btrfs_sb(s);
- return root->fs_info->fs_devices == test_fs_devices;
+ /*
+ * If this super block is going away, return false as it
+ * can't match as an existing super block.
+ */
+ if (!atomic_read(&s->s_active))
+ return 0;
+ return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
+}
+
+static int btrfs_set_super(struct super_block *s, void *data)
+{
+ s->s_fs_info = data;
+
+ return set_anon_super(s, data);
}
+
/*
* Find a superblock for the given device / mount point.
*
* Note: This is based on get_sb_bdev from fs/super.c with a few additions
* for multiple device setup. Make sure to keep it in sync.
*/
-static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
struct block_device *bdev = NULL;
struct super_block *s;
struct dentry *root;
struct btrfs_fs_devices *fs_devices = NULL;
+ struct btrfs_root *tree_root = NULL;
+ struct btrfs_fs_info *fs_info = NULL;
fmode_t mode = FMODE_READ;
char *subvol_name = NULL;
u64 subvol_objectid = 0;
int error = 0;
- int found = 0;
if (!(flags & MS_RDONLY))
mode |= FMODE_WRITE;
@@ -580,7 +610,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
&subvol_name, &subvol_objectid,
&fs_devices);
if (error)
- return error;
+ return ERR_PTR(error);
error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
if (error)
@@ -595,8 +625,24 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
goto error_close_devices;
}
+ /*
+ * Setup a dummy root and fs_info for test/set super. This is because
+ * we don't actually fill this stuff out until open_ctree, but we need
+ * it for searching for existing supers, so this lets us do that and
+ * then open_ctree will properly initialize everything later.
+ */
+ fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
+ tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+ if (!fs_info || !tree_root) {
+ error = -ENOMEM;
+ goto error_close_devices;
+ }
+ fs_info->tree_root = tree_root;
+ fs_info->fs_devices = fs_devices;
+ tree_root->fs_info = fs_info;
+
bdev = fs_devices->latest_bdev;
- s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
+ s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
if (IS_ERR(s))
goto error_s;
@@ -607,7 +653,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
goto error_close_devices;
}
- found = 1;
btrfs_close_devices(fs_devices);
} else {
char b[BDEVNAME_SIZE];
@@ -629,7 +674,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
if (IS_ERR(root)) {
error = PTR_ERR(root);
deactivate_locked_super(s);
- goto error;
+ goto error_free_subvol_name;
}
/* if they gave us a subvolume name bind mount into that */
if (strcmp(subvol_name, ".")) {
@@ -640,36 +685,34 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
mutex_unlock(&root->d_inode->i_mutex);
if (IS_ERR(new_root)) {
+ dput(root);
deactivate_locked_super(s);
error = PTR_ERR(new_root);
- dput(root);
- goto error_close_devices;
+ goto error_free_subvol_name;
}
if (!new_root->d_inode) {
dput(root);
dput(new_root);
deactivate_locked_super(s);
error = -ENXIO;
- goto error_close_devices;
+ goto error_free_subvol_name;
}
dput(root);
root = new_root;
}
- mnt->mnt_sb = s;
- mnt->mnt_root = root;
-
kfree(subvol_name);
- return 0;
+ return root;
error_s:
error = PTR_ERR(s);
error_close_devices:
btrfs_close_devices(fs_devices);
+ kfree(fs_info);
+ kfree(tree_root);
error_free_subvol_name:
kfree(subvol_name);
-error:
- return error;
+ return ERR_PTR(error);
}
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -716,18 +759,25 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct list_head *head = &root->fs_info->space_info;
struct btrfs_space_info *found;
u64 total_used = 0;
+ u64 total_used_data = 0;
int bits = dentry->d_sb->s_blocksize_bits;
__be32 *fsid = (__be32 *)root->fs_info->fsid;
rcu_read_lock();
- list_for_each_entry_rcu(found, head, list)
+ list_for_each_entry_rcu(found, head, list) {
+ if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_SYSTEM))
+ total_used_data += found->disk_total;
+ else
+ total_used_data += found->disk_used;
total_used += found->disk_used;
+ }
rcu_read_unlock();
buf->f_namelen = BTRFS_NAME_LEN;
buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
buf->f_bfree = buf->f_blocks - (total_used >> bits);
- buf->f_bavail = buf->f_bfree;
+ buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
buf->f_bsize = dentry->d_sb->s_blocksize;
buf->f_type = BTRFS_SUPER_MAGIC;
@@ -746,7 +796,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
static struct file_system_type btrfs_fs_type = {
.owner = THIS_MODULE,
.name = "btrfs",
- .get_sb = btrfs_get_sb,
+ .mount = btrfs_mount,
.kill_sb = kill_anon_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 66e4c66..f50e931 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -163,6 +163,7 @@ enum btrfs_trans_type {
TRANS_START,
TRANS_JOIN,
TRANS_USERSPACE,
+ TRANS_JOIN_NOLOCK,
};
static int may_wait_transaction(struct btrfs_root *root, int type)
@@ -179,14 +180,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
{
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
- int retries = 0;
int ret;
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h)
return ERR_PTR(-ENOMEM);
- mutex_lock(&root->fs_info->trans_mutex);
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_lock(&root->fs_info->trans_mutex);
if (may_wait_transaction(root, type))
wait_current_trans(root);
@@ -195,7 +196,8 @@ again:
cur_trans = root->fs_info->running_transaction;
cur_trans->use_count++;
- mutex_unlock(&root->fs_info->trans_mutex);
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_unlock(&root->fs_info->trans_mutex);
h->transid = cur_trans->transid;
h->transaction = cur_trans;
@@ -212,8 +214,7 @@ again:
}
if (num_items > 0) {
- ret = btrfs_trans_reserve_metadata(h, root, num_items,
- &retries);
+ ret = btrfs_trans_reserve_metadata(h, root, num_items);
if (ret == -EAGAIN) {
btrfs_commit_transaction(h, root);
goto again;
@@ -224,9 +225,11 @@ again:
}
}
- mutex_lock(&root->fs_info->trans_mutex);
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_lock(&root->fs_info->trans_mutex);
record_root_in_trans(h, root);
- mutex_unlock(&root->fs_info->trans_mutex);
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_unlock(&root->fs_info->trans_mutex);
if (!current->journal_info && type != TRANS_USERSPACE)
current->journal_info = h;
@@ -244,6 +247,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
return start_transaction(root, 0, TRANS_JOIN);
}
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+ int num_blocks)
+{
+ return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
+}
+
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
int num_blocks)
{
@@ -270,6 +279,58 @@ static noinline int wait_for_commit(struct btrfs_root *root,
return 0;
}
+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
+{
+ struct btrfs_transaction *cur_trans = NULL, *t;
+ int ret;
+
+ mutex_lock(&root->fs_info->trans_mutex);
+
+ ret = 0;
+ if (transid) {
+ if (transid <= root->fs_info->last_trans_committed)
+ goto out_unlock;
+
+ /* find specified transaction */
+ list_for_each_entry(t, &root->fs_info->trans_list, list) {
+ if (t->transid == transid) {
+ cur_trans = t;
+ break;
+ }
+ if (t->transid > transid)
+ break;
+ }
+ ret = -EINVAL;
+ if (!cur_trans)
+ goto out_unlock; /* bad transid */
+ } else {
+ /* find newest transaction that is committing | committed */
+ list_for_each_entry_reverse(t, &root->fs_info->trans_list,
+ list) {
+ if (t->in_commit) {
+ if (t->commit_done)
+ goto out_unlock;
+ cur_trans = t;
+ break;
+ }
+ }
+ if (!cur_trans)
+ goto out_unlock; /* nothing committing|committed */
+ }
+
+ cur_trans->use_count++;
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ wait_for_commit(root, cur_trans);
+
+ mutex_lock(&root->fs_info->trans_mutex);
+ put_transaction(cur_trans);
+ ret = 0;
+out_unlock:
+ mutex_unlock(&root->fs_info->trans_mutex);
+ return ret;
+}
+
#if 0
/*
* rate limit against the drop_snapshot code. This helps to slow down new
@@ -348,7 +409,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
}
static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int throttle)
+ struct btrfs_root *root, int throttle, int lock)
{
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_fs_info *info = root->fs_info;
@@ -376,26 +437,29 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_metadata(trans, root);
- if (!root->fs_info->open_ioctl_trans &&
+ if (lock && !root->fs_info->open_ioctl_trans &&
should_end_transaction(trans, root))
trans->transaction->blocked = 1;
- if (cur_trans->blocked && !cur_trans->in_commit) {
+ if (lock && cur_trans->blocked && !cur_trans->in_commit) {
if (throttle)
return btrfs_commit_transaction(trans, root);
else
wake_up_process(info->transaction_kthread);
}
- mutex_lock(&info->trans_mutex);
+ if (lock)
+ mutex_lock(&info->trans_mutex);
WARN_ON(cur_trans != info->running_transaction);
WARN_ON(cur_trans->num_writers < 1);
cur_trans->num_writers--;
+ smp_mb();
if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait);
put_transaction(cur_trans);
- mutex_unlock(&info->trans_mutex);
+ if (lock)
+ mutex_unlock(&info->trans_mutex);
if (current->journal_info == trans)
current->journal_info = NULL;
@@ -411,13 +475,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- return __btrfs_end_transaction(trans, root, 0);
+ return __btrfs_end_transaction(trans, root, 0, 1);
}
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- return __btrfs_end_transaction(trans, root, 1);
+ return __btrfs_end_transaction(trans, root, 1, 1);
+}
+
+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ return __btrfs_end_transaction(trans, root, 0, 0);
}
/*
@@ -832,11 +902,11 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_root *root = pending->root;
struct btrfs_root *parent_root;
struct inode *parent_inode;
+ struct dentry *parent;
struct dentry *dentry;
struct extent_buffer *tmp;
struct extent_buffer *old;
int ret;
- int retries = 0;
u64 to_reserve = 0;
u64 index = 0;
u64 objectid;
@@ -858,7 +928,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
if (to_reserve > 0) {
ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
- to_reserve, &retries);
+ to_reserve);
if (ret) {
pending->error = ret;
goto fail;
@@ -872,7 +942,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
trans->block_rsv = &pending->block_rsv;
dentry = pending->dentry;
- parent_inode = dentry->d_parent->d_inode;
+ parent = dget_parent(dentry);
+ parent_inode = parent->d_inode;
parent_root = BTRFS_I(parent_inode)->root;
record_root_in_trans(trans, parent_root);
@@ -920,6 +991,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
parent_inode->i_ino, index,
dentry->d_name.name, dentry->d_name.len);
BUG_ON(ret);
+ dput(parent);
key.offset = (u64)-1;
pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
@@ -966,6 +1038,8 @@ static void update_super_roots(struct btrfs_root *root)
super->root = root_item->bytenr;
super->generation = root_item->generation;
super->root_level = root_item->level;
+ if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
+ super->cache_generation = root_item->generation;
}
int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -988,11 +1062,127 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
return ret;
}
+/*
+ * wait for the current transaction commit to start and block subsequent
+ * transaction joins
+ */
+static void wait_current_trans_commit_start(struct btrfs_root *root,
+ struct btrfs_transaction *trans)
+{
+ DEFINE_WAIT(wait);
+
+ if (trans->in_commit)
+ return;
+
+ while (1) {
+ prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (trans->in_commit) {
+ finish_wait(&root->fs_info->transaction_blocked_wait,
+ &wait);
+ break;
+ }
+ mutex_unlock(&root->fs_info->trans_mutex);
+ schedule();
+ mutex_lock(&root->fs_info->trans_mutex);
+ finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
+ }
+}
+
+/*
+ * wait for the current transaction to start and then become unblocked.
+ * caller holds ref.
+ */
+static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
+ struct btrfs_transaction *trans)
+{
+ DEFINE_WAIT(wait);
+
+ if (trans->commit_done || (trans->in_commit && !trans->blocked))
+ return;
+
+ while (1) {
+ prepare_to_wait(&root->fs_info->transaction_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (trans->commit_done ||
+ (trans->in_commit && !trans->blocked)) {
+ finish_wait(&root->fs_info->transaction_wait,
+ &wait);
+ break;
+ }
+ mutex_unlock(&root->fs_info->trans_mutex);
+ schedule();
+ mutex_lock(&root->fs_info->trans_mutex);
+ finish_wait(&root->fs_info->transaction_wait,
+ &wait);
+ }
+}
+
+/*
+ * commit transactions asynchronously. once btrfs_commit_transaction_async
+ * returns, any subsequent transaction will not be allowed to join.
+ */
+struct btrfs_async_commit {
+ struct btrfs_trans_handle *newtrans;
+ struct btrfs_root *root;
+ struct delayed_work work;
+};
+
+static void do_async_commit(struct work_struct *work)
+{
+ struct btrfs_async_commit *ac =
+ container_of(work, struct btrfs_async_commit, work.work);
+
+ btrfs_commit_transaction(ac->newtrans, ac->root);
+ kfree(ac);
+}
+
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ int wait_for_unblock)
+{
+ struct btrfs_async_commit *ac;
+ struct btrfs_transaction *cur_trans;
+
+ ac = kmalloc(sizeof(*ac), GFP_NOFS);
+ BUG_ON(!ac);
+
+ INIT_DELAYED_WORK(&ac->work, do_async_commit);
+ ac->root = root;
+ ac->newtrans = btrfs_join_transaction(root, 0);
+
+ /* take transaction reference */
+ mutex_lock(&root->fs_info->trans_mutex);
+ cur_trans = trans->transaction;
+ cur_trans->use_count++;
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ btrfs_end_transaction(trans, root);
+ schedule_delayed_work(&ac->work, 0);
+
+ /* wait for transaction to start and unblock */
+ mutex_lock(&root->fs_info->trans_mutex);
+ if (wait_for_unblock)
+ wait_current_trans_commit_start_and_unblock(root, cur_trans);
+ else
+ wait_current_trans_commit_start(root, cur_trans);
+ put_transaction(cur_trans);
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ return 0;
+}
+
+/*
+ * btrfs_transaction state sequence:
+ * in_commit = 0, blocked = 0 (initial)
+ * in_commit = 1, blocked = 1
+ * blocked = 0
+ * commit_done = 1
+ */
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
unsigned long joined = 0;
- unsigned long timeout = 1;
struct btrfs_transaction *cur_trans;
struct btrfs_transaction *prev_trans = NULL;
DEFINE_WAIT(wait);
@@ -1039,6 +1229,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
trans->transaction->in_commit = 1;
trans->transaction->blocked = 1;
+ wake_up(&root->fs_info->transaction_blocked_wait);
+
if (cur_trans->list.prev != &root->fs_info->trans_list) {
prev_trans = list_entry(cur_trans->list.prev,
struct btrfs_transaction, list);
@@ -1063,11 +1255,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
snap_pending = 1;
WARN_ON(cur_trans != trans->transaction);
- if (cur_trans->num_writers > 1)
- timeout = MAX_SCHEDULE_TIMEOUT;
- else if (should_grow)
- timeout = 1;
-
mutex_unlock(&root->fs_info->trans_mutex);
if (flush_on_commit || snap_pending) {
@@ -1089,8 +1276,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
TASK_UNINTERRUPTIBLE);
smp_mb();
- if (cur_trans->num_writers > 1 || should_grow)
- schedule_timeout(timeout);
+ if (cur_trans->num_writers > 1)
+ schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+ else if (should_grow)
+ schedule_timeout(1);
mutex_lock(&root->fs_info->trans_mutex);
finish_wait(&cur_trans->writer_wait, &wait);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index e104986..f104b57 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -87,12 +87,17 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
int num_items);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
int num_blocks);
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+ int num_blocks);
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
int num_blocks);
+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
@@ -104,6 +109,9 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
int btrfs_clean_old_snapshots(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ int wait_for_unblock);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index f7ac8e0..992ab42 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
int ret = 0;
int wret;
int level;
- int orig_level;
int is_extent = 0;
int next_key_ret = 0;
u64 last_ret = 0;
@@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
return -ENOMEM;
level = btrfs_header_level(root->node);
- orig_level = level;
if (level == 0)
goto out;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fb102a9..054744a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -786,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
{
struct inode *dir;
int ret;
- struct btrfs_key location;
struct btrfs_inode_ref *ref;
struct btrfs_dir_item *di;
struct inode *inode;
@@ -795,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
unsigned long ref_ptr;
unsigned long ref_end;
- location.objectid = key->objectid;
- location.type = BTRFS_INODE_ITEM_KEY;
- location.offset = 0;
-
/*
* it is possible that we didn't log all the parent directories
* for a given inode. If we don't find the dir, just don't
@@ -1583,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
struct btrfs_path *path;
struct btrfs_root *root = wc->replay_dest;
struct btrfs_key key;
- u32 item_size;
int level;
int i;
int ret;
@@ -1601,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
nritems = btrfs_header_nritems(eb);
for (i = 0; i < nritems; i++) {
btrfs_item_key_to_cpu(eb, &key, i);
- item_size = btrfs_item_size_nr(eb, i);
/* inode keys are done during the first stage */
if (key.type == BTRFS_INODE_ITEM_KEY &&
@@ -1668,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
struct walk_control *wc)
{
u64 root_owner;
- u64 root_gen;
u64 bytenr;
u64 ptr_gen;
struct extent_buffer *next;
@@ -1698,7 +1690,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level];
root_owner = btrfs_header_owner(parent);
- root_gen = btrfs_header_generation(parent);
next = btrfs_find_create_tree_block(root, bytenr, blocksize);
@@ -1749,7 +1740,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
struct walk_control *wc)
{
u64 root_owner;
- u64 root_gen;
int i;
int slot;
int ret;
@@ -1757,8 +1747,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
slot = path->slots[i];
if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
- struct extent_buffer *node;
- node = path->nodes[i];
path->slots[i]++;
*level = i;
WARN_ON(*level == 0);
@@ -1771,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level + 1];
root_owner = btrfs_header_owner(parent);
- root_gen = btrfs_header_generation(parent);
wc->process_func(root, path->nodes[*level], wc,
btrfs_header_generation(path->nodes[*level]));
if (wc->free) {
@@ -2273,7 +2260,7 @@ fail:
}
btrfs_end_log_trans(root);
- return 0;
+ return err;
}
/* see comments for btrfs_del_dir_entries_in_log */
@@ -2729,7 +2716,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
struct extent_buffer *src = NULL;
- u32 size;
int err = 0;
int ret;
int nritems;
@@ -2793,7 +2779,6 @@ again:
break;
src = path->nodes[0];
- size = btrfs_item_size_nr(src, path->slots[0]);
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
ins_nr++;
goto next_slot;
@@ -2884,6 +2869,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
{
int ret = 0;
struct btrfs_root *root;
+ struct dentry *old_parent = NULL;
/*
* for regular files, if its inode is already on disk, we don't
@@ -2925,10 +2911,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
if (IS_ROOT(parent))
break;
- parent = parent->d_parent;
+ parent = dget_parent(parent);
+ dput(old_parent);
+ old_parent = parent;
inode = parent->d_inode;
}
+ dput(old_parent);
out:
return ret;
}
@@ -2960,6 +2949,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
{
int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
struct super_block *sb;
+ struct dentry *old_parent = NULL;
int ret = 0;
u64 last_committed = root->fs_info->last_trans_committed;
@@ -3031,10 +3021,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (IS_ROOT(parent))
break;
- parent = parent->d_parent;
+ parent = dget_parent(parent);
+ dput(old_parent);
+ old_parent = parent;
}
ret = 0;
end_trans:
+ dput(old_parent);
if (ret < 0) {
BUG_ON(ret != -ENOSPC);
root->fs_info->last_trans_log_full_commit = trans->transid;
@@ -3054,8 +3047,13 @@ end_no_trans:
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry)
{
- return btrfs_log_inode_parent(trans, root, dentry->d_inode,
- dentry->d_parent, 0);
+ struct dentry *parent = dget_parent(dentry);
+ int ret;
+
+ ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
+ dput(parent);
+
+ return ret;
}
/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e25e46a..6b98845 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -412,12 +412,16 @@ static noinline int device_list_add(const char *path,
device->fs_devices = fs_devices;
fs_devices->num_devices++;
- } else if (strcmp(device->name, path)) {
+ } else if (!device->name || strcmp(device->name, path)) {
name = kstrdup(path, GFP_NOFS);
if (!name)
return -ENOMEM;
kfree(device->name);
device->name = name;
+ if (device->missing) {
+ fs_devices->missing_devices--;
+ device->missing = 0;
+ }
}
if (found_transid > fs_devices->latest_trans) {
@@ -1236,6 +1240,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
device->fs_devices->num_devices--;
+ if (device->missing)
+ root->fs_info->fs_devices->missing_devices--;
+
next_device = list_entry(root->fs_info->fs_devices->devices.next,
struct btrfs_device, dev_list);
if (device->bdev == root->fs_info->sb->s_bdev)
@@ -1898,7 +1905,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
u64 size_to_free;
struct btrfs_path *path;
struct btrfs_key key;
- struct btrfs_chunk *chunk;
struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
struct btrfs_trans_handle *trans;
struct btrfs_key found_key;
@@ -1962,9 +1968,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
if (found_key.objectid != key.objectid)
break;
- chunk = btrfs_item_ptr(path->nodes[0],
- path->slots[0],
- struct btrfs_chunk);
/* chunk zero is special */
if (found_key.offset == 0)
break;
@@ -3031,8 +3034,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
}
bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
dev = multi->stripes[dev_nr].dev;
- BUG_ON(rw == WRITE && !dev->writeable);
- if (dev && dev->bdev) {
+ if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
bio->bi_bdev = dev->bdev;
if (async_submit)
schedule_bio(root, dev, rw, bio);
@@ -3085,7 +3087,9 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
device->devid = devid;
device->work.func = pending_bios_fn;
device->fs_devices = fs_devices;
+ device->missing = 1;
fs_devices->num_devices++;
+ fs_devices->missing_devices++;
spin_lock_init(&device->io_lock);
INIT_LIST_HEAD(&device->dev_alloc_list);
memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
@@ -3283,6 +3287,15 @@ static int read_one_dev(struct btrfs_root *root,
device = add_missing_dev(root, devid, dev_uuid);
if (!device)
return -ENOMEM;
+ } else if (!device->missing) {
+ /*
+ * this happens when a device that was properly setup
+ * in the device info lists suddenly goes bad.
+ * device->bdev is NULL, and so we have to set
+ * device->missing to one here
+ */
+ root->fs_info->fs_devices->missing_devices++;
+ device->missing = 1;
}
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2b638b6..2740db49 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -44,6 +44,7 @@ struct btrfs_device {
int writeable;
int in_fs_metadata;
+ int missing;
spinlock_t io_lock;
@@ -93,6 +94,7 @@ struct btrfs_fs_devices {
u64 num_devices;
u64 open_devices;
u64 rw_devices;
+ u64 missing_devices;
u64 total_rw_bytes;
struct block_device *latest_bdev;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 88ecbb2..698fdd2 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -178,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
struct inode *inode = dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
- struct btrfs_item *item;
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
int ret = 0, slot, advance;
@@ -234,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
}
advance = 1;
- item = btrfs_item_nr(leaf, slot);
btrfs_item_key_to_cpu(leaf, &found_key, slot);
/* check to make sure this item is what we want */
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 3e2b90e..b9cd544 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -199,8 +199,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
int nr_pages = 0;
struct page *in_page = NULL;
struct page *out_page = NULL;
- int out_written = 0;
- int in_read = 0;
unsigned long bytes_left;
*out_pages = 0;
@@ -233,9 +231,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
- out_written = 0;
- in_read = 0;
-
while (workspace->def_strm.total_in < len) {
ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
if (ret != Z_OK) {
diff --git a/fs/buffer.c b/fs/buffer.c
index 5930e38..2219a76 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1270,12 +1270,10 @@ static inline void check_irqs_on(void)
static void bh_lru_install(struct buffer_head *bh)
{
struct buffer_head *evictee = NULL;
- struct bh_lru *lru;
check_irqs_on();
bh_lru_lock();
- lru = &__get_cpu_var(bh_lrus);
- if (lru->bhs[0] != bh) {
+ if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
struct buffer_head *bhs[BH_LRU_SIZE];
int in;
int out = 0;
@@ -1283,7 +1281,8 @@ static void bh_lru_install(struct buffer_head *bh)
get_bh(bh);
bhs[out++] = bh;
for (in = 0; in < BH_LRU_SIZE; in++) {
- struct buffer_head *bh2 = lru->bhs[in];
+ struct buffer_head *bh2 =
+ __this_cpu_read(bh_lrus.bhs[in]);
if (bh2 == bh) {
__brelse(bh2);
@@ -1298,7 +1297,7 @@ static void bh_lru_install(struct buffer_head *bh)
}
while (out < BH_LRU_SIZE)
bhs[out++] = NULL;
- memcpy(lru->bhs, bhs, sizeof(bhs));
+ memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
}
bh_lru_unlock();
@@ -1313,23 +1312,22 @@ static struct buffer_head *
lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *ret = NULL;
- struct bh_lru *lru;
unsigned int i;
check_irqs_on();
bh_lru_lock();
- lru = &__get_cpu_var(bh_lrus);
for (i = 0; i < BH_LRU_SIZE; i++) {
- struct buffer_head *bh = lru->bhs[i];
+ struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
if (bh && bh->b_bdev == bdev &&
bh->b_blocknr == block && bh->b_size == size) {
if (i) {
while (i) {
- lru->bhs[i] = lru->bhs[i - 1];
+ __this_cpu_write(bh_lrus.bhs[i],
+ __this_cpu_read(bh_lrus.bhs[i - 1]));
i--;
}
- lru->bhs[0] = bh;
+ __this_cpu_write(bh_lrus.bhs[0], bh);
}
get_bh(bh);
ret = bh;
@@ -3203,22 +3201,23 @@ static void recalc_bh_state(void)
int i;
int tot = 0;
- if (__get_cpu_var(bh_accounting).ratelimit++ < 4096)
+ if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
return;
- __get_cpu_var(bh_accounting).ratelimit = 0;
+ __this_cpu_write(bh_accounting.ratelimit, 0);
for_each_online_cpu(i)
tot += per_cpu(bh_accounting, i).nr;
buffer_heads_over_limit = (tot > max_buffer_heads);
}
-
+
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
{
struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
if (ret) {
INIT_LIST_HEAD(&ret->b_assoc_buffers);
- get_cpu_var(bh_accounting).nr++;
+ preempt_disable();
+ __this_cpu_inc(bh_accounting.nr);
recalc_bh_state();
- put_cpu_var(bh_accounting);
+ preempt_enable();
}
return ret;
}
@@ -3228,9 +3227,10 @@ void free_buffer_head(struct buffer_head *bh)
{
BUG_ON(!list_empty(&bh->b_assoc_buffers));
kmem_cache_free(bh_cachep, bh);
- get_cpu_var(bh_accounting).nr--;
+ preempt_disable();
+ __this_cpu_dec(bh_accounting.nr);
recalc_bh_state();
- put_cpu_var(bh_accounting);
+ preempt_enable();
}
EXPORT_SYMBOL(free_buffer_head);
@@ -3243,9 +3243,8 @@ static void buffer_exit_cpu(int cpu)
brelse(b->bhs[i]);
b->bhs[i] = NULL;
}
- get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr;
+ this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
per_cpu(bh_accounting, cpu).nr = 0;
- put_cpu_var(bh_accounting);
}
static int buffer_cpu_notify(struct notifier_block *self,
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index 9e6c4f2..bd35212 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -2,31 +2,10 @@
# Makefile for CEPH filesystem.
#
-ifneq ($(KERNELRELEASE),)
-
obj-$(CONFIG_CEPH_FS) += ceph.o
-ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
+ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
export.o caps.o snap.o xattr.o \
mds_client.o mdsmap.o strings.o ceph_frag.o \
debugfs.o
-else
-#Otherwise we were called directly from the command
-# line; invoke the kernel build system.
-
-KERNELDIR ?= /lib/modules/$(shell uname -r)/build
-PWD := $(shell pwd)
-
-default: all
-
-all:
- $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_FS=m modules
-
-modules_install:
- $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_FS=m modules_install
-
-clean:
- $(MAKE) -C $(KERNELDIR) M=$(PWD) clean
-
-endif
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e9c874a..561438b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -204,7 +204,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
page->index << PAGE_CACHE_SHIFT, &len,
ci->i_truncate_seq, ci->i_truncate_size,
- &page, 1);
+ &page, 1, 0);
if (err == -ENOENT)
err = 0;
if (err < 0) {
@@ -287,7 +287,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
offset, &len,
ci->i_truncate_seq, ci->i_truncate_size,
- pages, nr_pages);
+ pages, nr_pages, 0);
if (rc == -ENOENT)
rc = 0;
if (rc < 0)
@@ -774,7 +774,7 @@ get_more_pages:
snapc, do_sync,
ci->i_truncate_seq,
ci->i_truncate_size,
- &inode->i_mtime, true, 1);
+ &inode->i_mtime, true, 1, 0);
max_pages = req->r_num_pages;
alloc_page_vec(fsc, req);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 98ab13e..60d27bc 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1430,8 +1430,8 @@ static int try_nonblocking_invalidate(struct inode *inode)
invalidating_gen == ci->i_rdcache_gen) {
/* success. */
dout("try_nonblocking_invalidate %p success\n", inode);
- ci->i_rdcache_gen = 0;
- ci->i_rdcache_revoking = 0;
+ /* save any racing async invalidate some trouble */
+ ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
return 0;
}
dout("try_nonblocking_invalidate %p failed\n", inode);
@@ -2273,8 +2273,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
{
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
- unsigned seq = le32_to_cpu(grant->seq);
- unsigned issue_seq = le32_to_cpu(grant->issue_seq);
+ int seq = le32_to_cpu(grant->seq);
int newcaps = le32_to_cpu(grant->caps);
int issued, implemented, used, wanted, dirty;
u64 size = le64_to_cpu(grant->size);
@@ -2286,8 +2285,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
int revoked_rdcache = 0;
int queue_invalidate = 0;
- dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n",
- inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps));
+ dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
+ inode, cap, mds, seq, ceph_cap_string(newcaps));
dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
inode->i_size);
@@ -2383,7 +2382,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
}
cap->seq = seq;
- cap->issue_seq = issue_seq;
/* file layout may have changed */
ci->i_layout = grant->layout;
@@ -2691,6 +2689,11 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
NULL /* no caps context */);
try_flush_caps(inode, session, NULL);
up_read(&mdsc->snap_rwsem);
+
+ /* make sure we re-request max_size, if necessary */
+ spin_lock(&inode->i_lock);
+ ci->i_requested_max_size = 0;
+ spin_unlock(&inode->i_lock);
}
/*
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 7ae1b3d..08f65fa 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -60,10 +60,13 @@ static int mdsc_show(struct seq_file *s, void *p)
for (rp = rb_first(&mdsc->request_tree); rp; rp = rb_next(rp)) {
req = rb_entry(rp, struct ceph_mds_request, r_node);
- if (req->r_request)
- seq_printf(s, "%lld\tmds%d\t", req->r_tid, req->r_mds);
- else
+ if (req->r_request && req->r_session)
+ seq_printf(s, "%lld\tmds%d\t", req->r_tid,
+ req->r_session->s_mds);
+ else if (!req->r_request)
seq_printf(s, "%lld\t(no request)\t", req->r_tid);
+ else
+ seq_printf(s, "%lld\t(no session)\t", req->r_tid);
seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e0a2dc6..0bc68de 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -40,12 +40,13 @@ int ceph_init_dentry(struct dentry *dentry)
if (dentry->d_fsdata)
return 0;
- if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
- dentry->d_op = &ceph_dentry_ops;
+ if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
+ ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+ d_set_d_op(dentry, &ceph_dentry_ops);
else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
- dentry->d_op = &ceph_snapdir_dentry_ops;
+ d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
else
- dentry->d_op = &ceph_snap_dentry_ops;
+ d_set_d_op(dentry, &ceph_snap_dentry_ops);
di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
if (!di)
@@ -111,11 +112,11 @@ static int __dcache_readdir(struct file *filp,
dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos,
last);
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
/* start at beginning? */
- if (filp->f_pos == 2 || (last &&
- filp->f_pos < ceph_dentry(last)->offset)) {
+ if (filp->f_pos == 2 || last == NULL ||
+ filp->f_pos < ceph_dentry(last)->offset) {
if (list_empty(&parent->d_subdirs))
goto out_unlock;
p = parent->d_subdirs.prev;
@@ -135,6 +136,7 @@ more:
fi->at_end = 1;
goto out_unlock;
}
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
if (!d_unhashed(dentry) && dentry->d_inode &&
ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
@@ -144,13 +146,15 @@ more:
dentry->d_name.len, dentry->d_name.name, di->offset,
filp->f_pos, d_unhashed(dentry) ? " unhashed" : "",
!dentry->d_inode ? " null" : "");
+ spin_unlock(&dentry->d_lock);
p = p->prev;
dentry = list_entry(p, struct dentry, d_u.d_child);
di = ceph_dentry(dentry);
}
- atomic_inc(&dentry->d_count);
- spin_unlock(&dcache_lock);
+ dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&parent->d_lock);
dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
@@ -176,19 +180,19 @@ more:
filp->f_pos++;
- /* make sure a dentry wasn't dropped while we didn't have dcache_lock */
+ /* make sure a dentry wasn't dropped while we didn't have parent lock */
if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
err = -EAGAIN;
goto out;
}
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
p = p->prev; /* advance to next dentry */
goto more;
out_unlock:
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
out:
if (last)
dput(last);
@@ -336,7 +340,10 @@ more:
if (req->r_reply_info.dir_end) {
kfree(fi->last_name);
fi->last_name = NULL;
- fi->next_offset = 2;
+ if (ceph_frag_is_rightmost(frag))
+ fi->next_offset = 2;
+ else
+ fi->next_offset = 0;
} else {
rinfo = &req->r_reply_info;
err = note_last_dentry(fi,
@@ -355,18 +362,22 @@ more:
u64 pos = ceph_make_fpos(frag, off);
struct ceph_mds_reply_inode *in =
rinfo->dir_in[off - fi->offset].in;
+ struct ceph_vino vino;
+ ino_t ino;
+
dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n",
off, off - fi->offset, rinfo->dir_nr, pos,
rinfo->dir_dname_len[off - fi->offset],
rinfo->dir_dname[off - fi->offset], in);
BUG_ON(!in);
ftype = le32_to_cpu(in->mode) >> 12;
+ vino.ino = le64_to_cpu(in->ino);
+ vino.snap = le64_to_cpu(in->snapid);
+ ino = ceph_vino_to_ino(vino);
if (filldir(dirent,
rinfo->dir_dname[off - fi->offset],
rinfo->dir_dname_len[off - fi->offset],
- pos,
- le64_to_cpu(in->ino),
- ftype) < 0) {
+ pos, ino, ftype) < 0) {
dout("filldir stopping us...\n");
return 0;
}
@@ -414,6 +425,7 @@ static void reset_readdir(struct ceph_file_info *fi)
fi->last_readdir = NULL;
}
kfree(fi->last_name);
+ fi->last_name = NULL;
fi->next_offset = 2; /* compensate for . and .. */
if (fi->dentry) {
dput(fi->dentry);
@@ -978,7 +990,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
*/
static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct inode *dir = dentry->d_parent->d_inode;
+ struct inode *dir;
+
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ dir = dentry->d_parent->d_inode;
dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
@@ -1207,6 +1224,26 @@ void ceph_dentry_lru_del(struct dentry *dn)
}
}
+/*
+ * Return name hash for a given dentry. This is dependent on
+ * the parent directory's hash function.
+ */
+unsigned ceph_dentry_hash(struct dentry *dn)
+{
+ struct inode *dir = dn->d_parent->d_inode;
+ struct ceph_inode_info *dci = ceph_inode(dir);
+
+ switch (dci->i_dir_layout.dl_dir_hash) {
+ case 0: /* for backward compat */
+ case CEPH_STR_HASH_LINUX:
+ return dn->d_name.hash;
+
+ default:
+ return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+ dn->d_name.name, dn->d_name.len);
+ }
+}
+
const struct file_operations ceph_dir_fops = {
.read = ceph_read_dir,
.readdir = ceph_readdir,
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 2297d94..e410561 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -59,7 +59,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
dout("encode_fh %p connectable\n", dentry);
cfh->ino = ceph_ino(dentry->d_inode);
cfh->parent_ino = ceph_ino(parent->d_inode);
- cfh->parent_name_hash = parent->d_name.hash;
+ cfh->parent_name_hash = ceph_dentry_hash(parent);
*max_len = connected_handle_length;
type = 2;
} else if (*max_len >= handle_length) {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e77c28c..7d0e4a8 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file)
}
/*
- * No need to block if we have any caps. Update wanted set
+ * No need to block if we have caps on the auth MDS (for
+ * write) or any MDS (for read). Update wanted set
* asynchronously.
*/
spin_lock(&inode->i_lock);
- if (__ceph_is_any_real_caps(ci)) {
+ if (__ceph_is_any_real_caps(ci) &&
+ (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
int mds_wanted = __ceph_caps_mds_wanted(ci);
int issued = __ceph_caps_issued(ci, NULL);
@@ -280,11 +282,13 @@ int ceph_release(struct inode *inode, struct file *file)
static int striped_read(struct inode *inode,
u64 off, u64 len,
struct page **pages, int num_pages,
- int *checkeof)
+ int *checkeof, bool align_to_pages,
+ unsigned long buf_align)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
u64 pos, this_len;
+ int io_align, page_align;
int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
int left, pages_left;
int read;
@@ -300,14 +304,19 @@ static int striped_read(struct inode *inode,
page_pos = pages;
pages_left = num_pages;
read = 0;
+ io_align = off & ~PAGE_MASK;
more:
+ if (align_to_pages)
+ page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
+ else
+ page_align = pos & ~PAGE_MASK;
this_len = left;
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, pos, &this_len,
ci->i_truncate_seq,
ci->i_truncate_size,
- page_pos, pages_left);
+ page_pos, pages_left, page_align);
hit_stripe = this_len < left;
was_short = ret >= 0 && ret < this_len;
if (ret == -ENOENT)
@@ -368,32 +377,34 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
struct inode *inode = file->f_dentry->d_inode;
struct page **pages;
u64 off = *poff;
- int num_pages = calc_pages_for(off, len);
- int ret;
+ int num_pages, ret;
dout("sync_read on file %p %llu~%u %s\n", file, off, len,
(file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
if (file->f_flags & O_DIRECT) {
- pages = ceph_get_direct_page_vector(data, num_pages, off, len);
-
- /*
- * flush any page cache pages in this range. this
- * will make concurrent normal and O_DIRECT io slow,
- * but it will at least behave sensibly when they are
- * in sequence.
- */
+ num_pages = calc_pages_for((unsigned long)data, len);
+ pages = ceph_get_direct_page_vector(data, num_pages, true);
} else {
+ num_pages = calc_pages_for(off, len);
pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
}
if (IS_ERR(pages))
return PTR_ERR(pages);
+ /*
+ * flush any page cache pages in this range. this
+ * will make concurrent normal and sync io slow,
+ * but it will at least behave sensibly when they are
+ * in sequence.
+ */
ret = filemap_write_and_wait(inode->i_mapping);
if (ret < 0)
goto done;
- ret = striped_read(inode, off, len, pages, num_pages, checkeof);
+ ret = striped_read(inode, off, len, pages, num_pages, checkeof,
+ file->f_flags & O_DIRECT,
+ (unsigned long)data & ~PAGE_MASK);
if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -402,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
done:
if (file->f_flags & O_DIRECT)
- ceph_put_page_vector(pages, num_pages);
+ ceph_put_page_vector(pages, num_pages, true);
else
ceph_release_page_vector(pages, num_pages);
dout("sync_read result %d\n", ret);
@@ -448,6 +459,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
int flags;
int do_sync = 0;
int check_caps = 0;
+ int page_align, io_align;
+ unsigned long buf_align;
int ret;
struct timespec mtime = CURRENT_TIME;
@@ -462,6 +475,9 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
else
pos = *offset;
+ io_align = pos & ~PAGE_MASK;
+ buf_align = (unsigned long)data & ~PAGE_MASK;
+
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
if (ret < 0)
return ret;
@@ -486,20 +502,27 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
*/
more:
len = left;
+ if (file->f_flags & O_DIRECT) {
+ /* write from beginning of first page, regardless of
+ io alignment */
+ page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
+ num_pages = calc_pages_for((unsigned long)data, len);
+ } else {
+ page_align = pos & ~PAGE_MASK;
+ num_pages = calc_pages_for(pos, len);
+ }
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), pos, &len,
CEPH_OSD_OP_WRITE, flags,
ci->i_snap_realm->cached_context,
do_sync,
ci->i_truncate_seq, ci->i_truncate_size,
- &mtime, false, 2);
+ &mtime, false, 2, page_align);
if (!req)
return -ENOMEM;
- num_pages = calc_pages_for(pos, len);
-
if (file->f_flags & O_DIRECT) {
- pages = ceph_get_direct_page_vector(data, num_pages, pos, len);
+ pages = ceph_get_direct_page_vector(data, num_pages, false);
if (IS_ERR(pages)) {
ret = PTR_ERR(pages);
goto out;
@@ -549,7 +572,7 @@ more:
}
if (file->f_flags & O_DIRECT)
- ceph_put_page_vector(pages, num_pages);
+ ceph_put_page_vector(pages, num_pages, false);
else if (file->f_flags & O_SYNC)
ceph_release_page_vector(pages, num_pages);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 1d6a45b..e835eff 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2,7 +2,6 @@
#include <linux/module.h>
#include <linux/fs.h>
-#include <linux/smp_lock.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/uaccess.h>
@@ -298,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_release_count = 0;
ci->i_symlink = NULL;
+ memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
+
ci->i_fragtree = RB_ROOT;
mutex_init(&ci->i_fragtree_mutex);
@@ -369,6 +370,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
return &ci->vfs_inode;
}
+static void ceph_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ceph_inode_cachep, ci);
+}
+
void ceph_destroy_inode(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -408,7 +418,7 @@ void ceph_destroy_inode(struct inode *inode)
if (ci->i_xattrs.prealloc_blob)
ceph_buffer_put(ci->i_xattrs.prealloc_blob);
- kmem_cache_free(ceph_inode_cachep, ci);
+ call_rcu(&inode->i_rcu, ceph_i_callback);
}
@@ -471,7 +481,9 @@ void ceph_fill_file_time(struct inode *inode, int issued,
if (issued & (CEPH_CAP_FILE_EXCL|
CEPH_CAP_FILE_WR|
- CEPH_CAP_FILE_BUFFER)) {
+ CEPH_CAP_FILE_BUFFER|
+ CEPH_CAP_AUTH_EXCL|
+ CEPH_CAP_XATTR_EXCL)) {
if (timespec_compare(ctime, &inode->i_ctime) > 0) {
dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
@@ -511,7 +523,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
warn = 1;
}
} else {
- /* we have no write caps; whatever the MDS says is true */
+ /* we have no write|excl caps; whatever the MDS says is true */
if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
inode->i_ctime = *ctime;
inode->i_mtime = *mtime;
@@ -567,12 +579,17 @@ static int fill_inode(struct inode *inode,
/*
* provided version will be odd if inode value is projected,
- * even if stable. skip the update if we have a newer info
- * (e.g., due to inode info racing form multiple MDSs), or if
- * we are getting projected (unstable) inode info.
+ * even if stable. skip the update if we have newer stable
+ * info (ours>=theirs, e.g. due to racing mds replies), unless
+ * we are getting projected (unstable) info (in which case the
+ * version is odd, and we want ours>theirs).
+ * us them
+ * 2 2 skip
+ * 3 2 skip
+ * 3 3 update
*/
if (le64_to_cpu(info->version) > 0 &&
- (ci->i_version & ~1) > le64_to_cpu(info->version))
+ (ci->i_version & ~1) >= le64_to_cpu(info->version))
goto no_change;
issued = __ceph_caps_issued(ci, &implemented);
@@ -606,7 +623,14 @@ static int fill_inode(struct inode *inode,
le32_to_cpu(info->time_warp_seq),
&ctime, &mtime, &atime);
- ci->i_max_size = le64_to_cpu(info->max_size);
+ /* only update max_size on auth cap */
+ if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+ ci->i_max_size != le64_to_cpu(info->max_size)) {
+ dout("max_size %lld -> %llu\n", ci->i_max_size,
+ le64_to_cpu(info->max_size));
+ ci->i_max_size = le64_to_cpu(info->max_size);
+ }
+
ci->i_layout = info->layout;
inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
@@ -667,6 +691,8 @@ static int fill_inode(struct inode *inode,
inode->i_op = &ceph_dir_iops;
inode->i_fop = &ceph_dir_fops;
+ ci->i_dir_layout = iinfo->dir_layout;
+
ci->i_files = le64_to_cpu(info->files);
ci->i_subdirs = le64_to_cpu(info->subdirs);
ci->i_rbytes = le64_to_cpu(info->rbytes);
@@ -828,13 +854,13 @@ static void ceph_set_dentry_offset(struct dentry *dn)
di->offset = ceph_inode(inode)->i_max_offset++;
spin_unlock(&inode->i_lock);
- spin_lock(&dcache_lock);
- spin_lock(&dn->d_lock);
+ spin_lock(&dir->d_lock);
+ spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
list_move(&dn->d_u.d_child, &dir->d_subdirs);
dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
dn->d_u.d_child.prev, dn->d_u.d_child.next);
spin_unlock(&dn->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&dir->d_lock);
}
/*
@@ -866,8 +892,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
} else if (realdn) {
dout("dn %p (%d) spliced with %p (%d) "
"inode %p ino %llx.%llx\n",
- dn, atomic_read(&dn->d_count),
- realdn, atomic_read(&realdn->d_count),
+ dn, dn->d_count,
+ realdn, realdn->d_count,
realdn->d_inode, ceph_vinop(realdn->d_inode));
dput(dn);
dn = realdn;
@@ -1055,7 +1081,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
ininfo = rinfo->targeti.in;
vino.ino = le64_to_cpu(ininfo->ino);
vino.snap = le64_to_cpu(ininfo->snapid);
- if (!dn->d_inode) {
+ in = dn->d_inode;
+ if (!in) {
in = ceph_get_inode(sb, vino);
if (IS_ERR(in)) {
pr_err("fill_trace bad get_inode "
@@ -1217,11 +1244,11 @@ retry_lookup:
goto retry_lookup;
} else {
/* reorder parent's d_subdirs */
- spin_lock(&dcache_lock);
- spin_lock(&dn->d_lock);
+ spin_lock(&parent->d_lock);
+ spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
list_move(&dn->d_u.d_child, &parent->d_subdirs);
spin_unlock(&dn->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
}
di = dn->d_fsdata;
@@ -1386,11 +1413,8 @@ static void ceph_invalidate_work(struct work_struct *work)
spin_lock(&inode->i_lock);
dout("invalidate_pages %p gen %d revoking %d\n", inode,
ci->i_rdcache_gen, ci->i_rdcache_revoking);
- if (ci->i_rdcache_gen == 0 ||
- ci->i_rdcache_revoking != ci->i_rdcache_gen) {
- BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen);
+ if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
/* nevermind! */
- ci->i_rdcache_revoking = 0;
spin_unlock(&inode->i_lock);
goto out;
}
@@ -1400,15 +1424,16 @@ static void ceph_invalidate_work(struct work_struct *work)
ceph_invalidate_nondirty_pages(inode->i_mapping);
spin_lock(&inode->i_lock);
- if (orig_gen == ci->i_rdcache_gen) {
+ if (orig_gen == ci->i_rdcache_gen &&
+ orig_gen == ci->i_rdcache_revoking) {
dout("invalidate_pages %p gen %d successful\n", inode,
ci->i_rdcache_gen);
- ci->i_rdcache_gen = 0;
- ci->i_rdcache_revoking = 0;
+ ci->i_rdcache_revoking--;
check = 1;
} else {
- dout("invalidate_pages %p gen %d raced, gen now %d\n",
- inode, orig_gen, ci->i_rdcache_gen);
+ dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
+ inode, orig_gen, ci->i_rdcache_gen,
+ ci->i_rdcache_revoking);
}
spin_unlock(&inode->i_lock);
@@ -1739,7 +1764,7 @@ int ceph_do_getattr(struct inode *inode, int mask)
return 0;
}
- dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask));
+ dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
return 0;
@@ -1760,12 +1785,17 @@ int ceph_do_getattr(struct inode *inode, int mask)
* Check inode permissions. We verify we have a valid value for
* the AUTH cap, then call the generic handler.
*/
-int ceph_permission(struct inode *inode, int mask)
+int ceph_permission(struct inode *inode, int mask, unsigned int flags)
{
- int err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
+ int err;
+
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
+ err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
if (!err)
- err = generic_permission(inode, mask, NULL);
+ err = generic_permission(inode, mask, flags, NULL);
return err;
}
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
index a6ce54e..52e8fd7 100644
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -4,7 +4,7 @@
#include <linux/ioctl.h>
#include <linux/types.h>
-#define CEPH_IOCTL_MAGIC 0x98
+#define CEPH_IOCTL_MAGIC 0x97
/* just use u64 to align sanely on all archs */
struct ceph_ioctl_layout {
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 40abde9..476b329 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -11,40 +11,68 @@
* Implement fcntl and flock locking functions.
*/
static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
- u64 pid, u64 pid_ns,
- int cmd, u64 start, u64 length, u8 wait)
+ int cmd, u8 wait, struct file_lock *fl)
{
struct inode *inode = file->f_dentry->d_inode;
struct ceph_mds_client *mdsc =
ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_request *req;
int err;
+ u64 length = 0;
req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
req->r_inode = igrab(inode);
+ /* mds requires start and length rather than start and end */
+ if (LLONG_MAX == fl->fl_end)
+ length = 0;
+ else
+ length = fl->fl_end - fl->fl_start + 1;
+
dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
"length: %llu, wait: %d, type`: %d", (int)lock_type,
- (int)operation, pid, start, length, wait, cmd);
+ (int)operation, (u64)fl->fl_pid, fl->fl_start,
+ length, wait, fl->fl_type);
+
req->r_args.filelock_change.rule = lock_type;
req->r_args.filelock_change.type = cmd;
- req->r_args.filelock_change.pid = cpu_to_le64(pid);
+ req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
/* This should be adjusted, but I'm not sure if
namespaces actually get id numbers*/
req->r_args.filelock_change.pid_namespace =
- cpu_to_le64((u64)pid_ns);
- req->r_args.filelock_change.start = cpu_to_le64(start);
+ cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
+ req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
req->r_args.filelock_change.length = cpu_to_le64(length);
req->r_args.filelock_change.wait = wait;
err = ceph_mdsc_do_request(mdsc, inode, req);
+
+ if ( operation == CEPH_MDS_OP_GETFILELOCK){
+ fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
+ if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
+ fl->fl_type = F_RDLCK;
+ else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
+ fl->fl_type = F_WRLCK;
+ else
+ fl->fl_type = F_UNLCK;
+
+ fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
+ length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
+ le64_to_cpu(req->r_reply_info.filelock_reply->length);
+ if (length >= 1)
+ fl->fl_end = length -1;
+ else
+ fl->fl_end = 0;
+
+ }
ceph_mdsc_put_request(req);
dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
- "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type,
- (int)operation, pid, start, length, wait, cmd, err);
+ "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type,
+ (int)operation, (u64)fl->fl_pid, fl->fl_start,
+ length, wait, fl->fl_type, err);
return err;
}
@@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
*/
int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
{
- u64 length;
u8 lock_cmd;
int err;
u8 wait = 0;
@@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
else
lock_cmd = CEPH_LOCK_UNLOCK;
- if (LLONG_MAX == fl->fl_end)
- length = 0;
- else
- length = fl->fl_end - fl->fl_start + 1;
-
- err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
- (u64)fl->fl_pid,
- (u64)(unsigned long)fl->fl_nspid,
- lock_cmd, fl->fl_start,
- length, wait);
+ err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
if (!err) {
- dout("mds locked, locking locally");
- err = posix_lock_file(file, fl, NULL);
- if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
- /* undo! This should only happen if the kernel detects
- * local deadlock. */
- ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
- (u64)fl->fl_pid,
- (u64)(unsigned long)fl->fl_nspid,
- CEPH_LOCK_UNLOCK, fl->fl_start,
- length, 0);
- dout("got %d on posix_lock_file, undid lock", err);
+ if ( op != CEPH_MDS_OP_GETFILELOCK ){
+ dout("mds locked, locking locally");
+ err = posix_lock_file(file, fl, NULL);
+ if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
+ /* undo! This should only happen if the kernel detects
+ * local deadlock. */
+ ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+ CEPH_LOCK_UNLOCK, 0, fl);
+ dout("got %d on posix_lock_file, undid lock", err);
+ }
}
+
} else {
dout("mds returned error code %d", err);
}
@@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
{
- u64 length;
u8 lock_cmd;
int err;
u8 wait = 1;
@@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
lock_cmd = CEPH_LOCK_EXCL;
else
lock_cmd = CEPH_LOCK_UNLOCK;
- /* mds requires start and length rather than start and end */
- if (LLONG_MAX == fl->fl_end)
- length = 0;
- else
- length = fl->fl_end - fl->fl_start + 1;
err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
- file, (u64)fl->fl_pid,
- (u64)(unsigned long)fl->fl_nspid,
- lock_cmd, fl->fl_start,
- length, wait);
+ file, lock_cmd, wait, fl);
if (!err) {
err = flock_lock_file_wait(file, fl);
if (err) {
ceph_lock_message(CEPH_LOCK_FLOCK,
CEPH_MDS_OP_SETFILELOCK,
- file, (u64)fl->fl_pid,
- (u64)(unsigned long)fl->fl_nspid,
- CEPH_LOCK_UNLOCK, fl->fl_start,
- length, 0);
+ file, CEPH_LOCK_UNLOCK, 0, fl);
dout("got %d on flock_lock_file_wait, undid lock", err);
}
} else {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3142b15..1e30d19 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -6,7 +6,6 @@
#include <linux/sched.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
-#include <linux/smp_lock.h>
#include "super.h"
#include "mds_client.h"
@@ -61,7 +60,8 @@ static const struct ceph_connection_operations mds_con_ops;
* parse individual inode info
*/
static int parse_reply_info_in(void **p, void *end,
- struct ceph_mds_reply_info_in *info)
+ struct ceph_mds_reply_info_in *info,
+ int features)
{
int err = -EIO;
@@ -75,6 +75,12 @@ static int parse_reply_info_in(void **p, void *end,
info->symlink = *p;
*p += info->symlink_len;
+ if (features & CEPH_FEATURE_DIRLAYOUTHASH)
+ ceph_decode_copy_safe(p, end, &info->dir_layout,
+ sizeof(info->dir_layout), bad);
+ else
+ memset(&info->dir_layout, 0, sizeof(info->dir_layout));
+
ceph_decode_32_safe(p, end, info->xattr_len, bad);
ceph_decode_need(p, end, info->xattr_len, bad);
info->xattr_data = *p;
@@ -89,12 +95,13 @@ bad:
* target inode.
*/
static int parse_reply_info_trace(void **p, void *end,
- struct ceph_mds_reply_info_parsed *info)
+ struct ceph_mds_reply_info_parsed *info,
+ int features)
{
int err;
if (info->head->is_dentry) {
- err = parse_reply_info_in(p, end, &info->diri);
+ err = parse_reply_info_in(p, end, &info->diri, features);
if (err < 0)
goto out_bad;
@@ -115,7 +122,7 @@ static int parse_reply_info_trace(void **p, void *end,
}
if (info->head->is_target) {
- err = parse_reply_info_in(p, end, &info->targeti);
+ err = parse_reply_info_in(p, end, &info->targeti, features);
if (err < 0)
goto out_bad;
}
@@ -135,7 +142,8 @@ out_bad:
* parse readdir results
*/
static int parse_reply_info_dir(void **p, void *end,
- struct ceph_mds_reply_info_parsed *info)
+ struct ceph_mds_reply_info_parsed *info,
+ int features)
{
u32 num, i = 0;
int err;
@@ -183,7 +191,7 @@ static int parse_reply_info_dir(void **p, void *end,
*p += sizeof(struct ceph_mds_reply_lease);
/* inode */
- err = parse_reply_info_in(p, end, &info->dir_in[i]);
+ err = parse_reply_info_in(p, end, &info->dir_in[i], features);
if (err < 0)
goto out_bad;
i++;
@@ -203,10 +211,45 @@ out_bad:
}
/*
+ * parse fcntl F_GETLK results
+ */
+static int parse_reply_info_filelock(void **p, void *end,
+ struct ceph_mds_reply_info_parsed *info,
+ int features)
+{
+ if (*p + sizeof(*info->filelock_reply) > end)
+ goto bad;
+
+ info->filelock_reply = *p;
+ *p += sizeof(*info->filelock_reply);
+
+ if (unlikely(*p != end))
+ goto bad;
+ return 0;
+
+bad:
+ return -EIO;
+}
+
+/*
+ * parse extra results
+ */
+static int parse_reply_info_extra(void **p, void *end,
+ struct ceph_mds_reply_info_parsed *info,
+ int features)
+{
+ if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
+ return parse_reply_info_filelock(p, end, info, features);
+ else
+ return parse_reply_info_dir(p, end, info, features);
+}
+
+/*
* parse entire mds reply
*/
static int parse_reply_info(struct ceph_msg *msg,
- struct ceph_mds_reply_info_parsed *info)
+ struct ceph_mds_reply_info_parsed *info,
+ int features)
{
void *p, *end;
u32 len;
@@ -219,15 +262,15 @@ static int parse_reply_info(struct ceph_msg *msg,
/* trace */
ceph_decode_32_safe(&p, end, len, bad);
if (len > 0) {
- err = parse_reply_info_trace(&p, p+len, info);
+ err = parse_reply_info_trace(&p, p+len, info, features);
if (err < 0)
goto out_bad;
}
- /* dir content */
+ /* extra */
ceph_decode_32_safe(&p, end, len, bad);
if (len > 0) {
- err = parse_reply_info_dir(&p, p+len, info);
+ err = parse_reply_info_extra(&p, p+len, info, features);
if (err < 0)
goto out_bad;
}
@@ -529,6 +572,9 @@ static void __register_request(struct ceph_mds_client *mdsc,
ceph_mdsc_get_request(req);
__insert_request(mdsc, req);
+ req->r_uid = current_fsuid();
+ req->r_gid = current_fsgid();
+
if (dir) {
struct ceph_inode_info *ci = ceph_inode(dir);
@@ -620,7 +666,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
} else {
/* dir + name */
inode = dir;
- hash = req->r_dentry->d_name.hash;
+ hash = ceph_dentry_hash(req->r_dentry);
is_hash = true;
}
}
@@ -1452,7 +1498,7 @@ retry:
*base = ceph_ino(temp->d_inode);
*plen = len;
dout("build_path on %p %d built %llx '%.*s'\n",
- dentry, atomic_read(&dentry->d_count), *base, len, path);
+ dentry, dentry->d_count, *base, len, path);
return path;
}
@@ -1588,8 +1634,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
head->op = cpu_to_le32(req->r_op);
- head->caller_uid = cpu_to_le32(current_fsuid());
- head->caller_gid = cpu_to_le32(current_fsgid());
+ head->caller_uid = cpu_to_le32(req->r_uid);
+ head->caller_gid = cpu_to_le32(req->r_gid);
head->args = req->r_args;
ceph_encode_filepath(&p, end, ino1, path1);
@@ -1659,7 +1705,6 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
struct ceph_msg *msg;
int flags = 0;
- req->r_mds = mds;
req->r_attempts++;
if (req->r_inode) {
struct ceph_cap *cap =
@@ -1746,6 +1791,8 @@ static int __do_request(struct ceph_mds_client *mdsc,
goto finish;
}
+ put_request_session(req);
+
mds = __choose_mds(mdsc, req);
if (mds < 0 ||
ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
@@ -1763,6 +1810,8 @@ static int __do_request(struct ceph_mds_client *mdsc,
goto finish;
}
}
+ req->r_session = get_session(session);
+
dout("do_request mds%d session %p state %s\n", mds, session,
session_state_name(session->s_state));
if (session->s_state != CEPH_MDS_SESSION_OPEN &&
@@ -1775,7 +1824,6 @@ static int __do_request(struct ceph_mds_client *mdsc,
}
/* send request */
- req->r_session = get_session(session);
req->r_resend_mds = -1; /* forget any previous mds hint */
if (req->r_request_started == 0) /* note request start time */
@@ -1829,7 +1877,6 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
if (req->r_session &&
req->r_session->s_mds == mds) {
dout(" kicking tid %llu\n", req->r_tid);
- put_request_session(req);
__do_request(mdsc, req);
}
}
@@ -2022,8 +2069,11 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
goto out;
} else {
struct ceph_inode_info *ci = ceph_inode(req->r_inode);
- struct ceph_cap *cap =
- ceph_get_cap_for_mds(ci, req->r_mds);;
+ struct ceph_cap *cap = NULL;
+
+ if (req->r_session)
+ cap = ceph_get_cap_for_mds(ci,
+ req->r_session->s_mds);
dout("already using auth");
if ((!cap || cap != ci->i_auth_cap) ||
@@ -2067,12 +2117,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
dout("handle_reply tid %lld result %d\n", tid, result);
rinfo = &req->r_reply_info;
- err = parse_reply_info(msg, rinfo);
+ err = parse_reply_info(msg, rinfo, session->s_con.peer_features);
mutex_unlock(&mdsc->mutex);
mutex_lock(&session->s_mutex);
if (err < 0) {
- pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds);
+ pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
ceph_msg_dump(msg);
goto out_err;
}
@@ -2092,7 +2142,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
mutex_lock(&req->r_fill_mutex);
err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
if (err == 0) {
- if (result == 0 && rinfo->dir_nr)
+ if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
+ rinfo->dir_nr)
ceph_readdir_prepopulate(req, req->r_session);
ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d66d63c..4e3a9cc 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -35,6 +35,7 @@ struct ceph_cap;
*/
struct ceph_mds_reply_info_in {
struct ceph_mds_reply_inode *in;
+ struct ceph_dir_layout dir_layout;
u32 symlink_len;
char *symlink;
u32 xattr_len;
@@ -42,26 +43,37 @@ struct ceph_mds_reply_info_in {
};
/*
- * parsed info about an mds reply, including information about the
- * target inode and/or its parent directory and dentry, and directory
- * contents (for readdir results).
+ * parsed info about an mds reply, including information about
+ * either: 1) the target inode and/or its parent directory and dentry,
+ * and directory contents (for readdir results), or
+ * 2) the file range lock info (for fcntl F_GETLK results).
*/
struct ceph_mds_reply_info_parsed {
struct ceph_mds_reply_head *head;
+ /* trace */
struct ceph_mds_reply_info_in diri, targeti;
struct ceph_mds_reply_dirfrag *dirfrag;
char *dname;
u32 dname_len;
struct ceph_mds_reply_lease *dlease;
- struct ceph_mds_reply_dirfrag *dir_dir;
- int dir_nr;
- char **dir_dname;
- u32 *dir_dname_len;
- struct ceph_mds_reply_lease **dir_dlease;
- struct ceph_mds_reply_info_in *dir_in;
- u8 dir_complete, dir_end;
+ /* extra */
+ union {
+ /* for fcntl F_GETLK results */
+ struct ceph_filelock *filelock_reply;
+
+ /* for readdir results */
+ struct {
+ struct ceph_mds_reply_dirfrag *dir_dir;
+ int dir_nr;
+ char **dir_dname;
+ u32 *dir_dname_len;
+ struct ceph_mds_reply_lease **dir_dlease;
+ struct ceph_mds_reply_info_in *dir_in;
+ u8 dir_complete, dir_end;
+ };
+ };
/* encoded blob describing snapshot contexts for certain
operations (e.g., open) */
@@ -154,7 +166,6 @@ struct ceph_mds_request {
struct ceph_mds_client *r_mdsc;
int r_op; /* mds op code */
- int r_mds;
/* operation on what? */
struct inode *r_inode; /* arg1 */
@@ -170,6 +181,8 @@ struct ceph_mds_request {
union ceph_mds_request_args r_args;
int r_fmode; /* file mode, if expecting cap */
+ uid_t r_uid;
+ gid_t r_gid;
/* for choosing which mds to send this request to */
int r_direct_mode;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index d6e0e04..bf6f0f3 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -428,7 +428,8 @@ struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
goto fail;
}
fsc->client->extra_mon_dispatch = extra_mon_dispatch;
- fsc->client->supported_features |= CEPH_FEATURE_FLOCK;
+ fsc->client->supported_features |= CEPH_FEATURE_FLOCK |
+ CEPH_FEATURE_DIRLAYOUTHASH;
fsc->client->monc.want_mdsmap = 1;
fsc->mount_options = fsopt;
@@ -443,13 +444,17 @@ struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
goto fail_client;
err = -ENOMEM;
- fsc->wb_wq = create_workqueue("ceph-writeback");
+ /*
+ * The number of concurrent works can be high but they don't need
+ * to be processed in parallel, limit concurrency.
+ */
+ fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
if (fsc->wb_wq == NULL)
goto fail_bdi;
- fsc->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid");
+ fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
if (fsc->pg_inv_wq == NULL)
goto fail_wb_wq;
- fsc->trunc_wq = create_singlethread_workqueue("ceph-trunc");
+ fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
if (fsc->trunc_wq == NULL)
goto fail_pg_inv_wq;
@@ -635,7 +640,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
/*
* mount: join the ceph cluster, and open root directory.
*/
-static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
+static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
const char *path)
{
int err;
@@ -678,16 +683,14 @@ static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
}
}
- mnt->mnt_root = root;
- mnt->mnt_sb = fsc->sb;
-
fsc->mount_state = CEPH_MOUNT_MOUNTED;
dout("mount success\n");
- err = 0;
+ mutex_unlock(&fsc->client->mount_mutex);
+ return root;
out:
mutex_unlock(&fsc->client->mount_mutex);
- return err;
+ return ERR_PTR(err);
fail:
if (first) {
@@ -777,41 +780,45 @@ static int ceph_register_bdi(struct super_block *sb,
return err;
}
-static int ceph_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *ceph_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
struct super_block *sb;
struct ceph_fs_client *fsc;
+ struct dentry *res;
int err;
int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
const char *path = NULL;
struct ceph_mount_options *fsopt = NULL;
struct ceph_options *opt = NULL;
- dout("ceph_get_sb\n");
+ dout("ceph_mount\n");
err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
- if (err < 0)
+ if (err < 0) {
+ res = ERR_PTR(err);
goto out_final;
+ }
/* create client (which we may/may not use) */
fsc = create_fs_client(fsopt, opt);
if (IS_ERR(fsc)) {
- err = PTR_ERR(fsc);
+ res = ERR_CAST(fsc);
kfree(fsopt);
kfree(opt);
goto out_final;
}
err = ceph_mdsc_init(fsc);
- if (err < 0)
+ if (err < 0) {
+ res = ERR_PTR(err);
goto out;
+ }
if (ceph_test_opt(fsc->client, NOSHARE))
compare_super = NULL;
sb = sget(fs_type, compare_super, ceph_set_super, fsc);
if (IS_ERR(sb)) {
- err = PTR_ERR(sb);
+ res = ERR_CAST(sb);
goto out;
}
@@ -823,16 +830,18 @@ static int ceph_get_sb(struct file_system_type *fs_type,
} else {
dout("get_sb using new client %p\n", fsc);
err = ceph_register_bdi(sb, fsc);
- if (err < 0)
+ if (err < 0) {
+ res = ERR_PTR(err);
goto out_splat;
+ }
}
- err = ceph_mount(fsc, mnt, path);
- if (err < 0)
+ res = ceph_real_mount(fsc, path);
+ if (IS_ERR(res))
goto out_splat;
- dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root,
- mnt->mnt_root->d_inode, ceph_vinop(mnt->mnt_root->d_inode));
- return 0;
+ dout("root %p inode %p ino %llx.%llx\n", res,
+ res->d_inode, ceph_vinop(res->d_inode));
+ return res;
out_splat:
ceph_mdsc_close_sessions(fsc->mdsc);
@@ -843,8 +852,8 @@ out:
ceph_mdsc_destroy(fsc);
destroy_fs_client(fsc);
out_final:
- dout("ceph_get_sb fail %d\n", err);
- return err;
+ dout("ceph_mount fail %ld\n", PTR_ERR(res));
+ return res;
}
static void ceph_kill_sb(struct super_block *s)
@@ -860,7 +869,7 @@ static void ceph_kill_sb(struct super_block *s)
static struct file_system_type ceph_fs_type = {
.owner = THIS_MODULE,
.name = "ceph",
- .get_sb = ceph_get_sb,
+ .mount = ceph_mount,
.kill_sb = ceph_kill_sb,
.fs_flags = FS_RENAME_DOES_D_MOVE,
};
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1886294..20b907d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -239,6 +239,7 @@ struct ceph_inode_info {
unsigned i_ceph_flags;
unsigned long i_release_count;
+ struct ceph_dir_layout i_dir_layout;
struct ceph_file_layout i_layout;
char *i_symlink;
@@ -293,9 +294,7 @@ struct ceph_inode_info {
int i_rd_ref, i_rdcache_ref, i_wr_ref;
int i_wrbuffer_ref, i_wrbuffer_ref_head;
u32 i_shared_gen; /* increment each time we get FILE_SHARED */
- u32 i_rdcache_gen; /* we increment this each time we get
- FILE_CACHE. If it's non-zero, we
- _may_ have cached pages. */
+ u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
struct list_head i_unsafe_writes; /* uncommitted sync writes */
@@ -667,7 +666,7 @@ extern void ceph_queue_invalidate(struct inode *inode);
extern void ceph_queue_writeback(struct inode *inode);
extern int ceph_do_getattr(struct inode *inode, int mask);
-extern int ceph_permission(struct inode *inode, int mask);
+extern int ceph_permission(struct inode *inode, int mask, unsigned int flags);
extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
@@ -770,6 +769,7 @@ extern void ceph_dentry_lru_add(struct dentry *dn);
extern void ceph_dentry_lru_touch(struct dentry *dn);
extern void ceph_dentry_lru_del(struct dentry *dn);
extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
+extern unsigned ceph_dentry_hash(struct dentry *dn);
/*
* our d_ops vary depending on whether the inode is live,
diff --git a/fs/char_dev.c b/fs/char_dev.c
index e5b9df9..6e99b9d 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -417,18 +417,6 @@ static int chrdev_open(struct inode *inode, struct file *filp)
return ret;
}
-int cdev_index(struct inode *inode)
-{
- int idx;
- struct kobject *kobj;
-
- kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
- if (!kobj)
- return -1;
- kobject_put(kobj);
- return idx;
-}
-
void cd_forget(struct inode *inode)
{
spin_lock(&cdev_lock);
@@ -582,7 +570,6 @@ EXPORT_SYMBOL(cdev_init);
EXPORT_SYMBOL(cdev_alloc);
EXPORT_SYMBOL(cdev_del);
EXPORT_SYMBOL(cdev_add);
-EXPORT_SYMBOL(cdev_index);
EXPORT_SYMBOL(__register_chrdev);
EXPORT_SYMBOL(__unregister_chrdev);
EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 917b7d4..ee45648 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,6 +2,10 @@ config CIFS
tristate "CIFS support (advanced network filesystem, SMBFS successor)"
depends on INET
select NLS
+ select CRYPTO
+ select CRYPTO_MD5
+ select CRYPTO_HMAC
+ select CRYPTO_ARC4
help
This is the client VFS module for the Common Internet File System
(CIFS) protocol which is the successor to the Server Message Block
@@ -140,6 +144,13 @@ config CIFS_FSCACHE
to be cached locally on disk through the general filesystem cache
manager. If unsure, say N.
+config CIFS_ACL
+ bool "Provide CIFS ACL support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && CIFS_XATTR
+ help
+ Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
+ is handed over to the application/caller.
+
config CIFS_EXPERIMENTAL
bool "CIFS Experimental Features (EXPERIMENTAL)"
depends on CIFS && EXPERIMENTAL
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index adefa60..43b19dd 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -6,7 +6,9 @@ obj-$(CONFIG_CIFS) += cifs.o
cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
- readdir.o ioctl.o sess.o export.o cifsacl.o
+ readdir.o ioctl.o sess.o export.o
+
+cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
diff --git a/fs/cifs/README b/fs/cifs/README
index ee68d10..46af99a 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -337,6 +337,15 @@ A partial list of the supported mount options follows:
wsize default write size (default 57344)
maximum wsize currently allowed by CIFS is 57344 (fourteen
4096 byte pages)
+ actimeo=n attribute cache timeout in seconds (default 1 second).
+ After this timeout, the cifs client requests fresh attribute
+ information from the server. This option allows to tune the
+ attribute cache timeout to suit the workload needs. Shorter
+ timeouts mean better the cache coherency, but increased number
+ of calls to the server. Longer timeouts mean reduced number
+ of calls to the server at the expense of less stricter cache
+ coherency checks (i.e. incorrect attribute cache for a short
+ period of time).
rw mount the network share read-write (note that the
server may still consider the share read-only)
ro mount network share read-only
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index 5aff46c..355abcdc 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -81,7 +81,7 @@ u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for
v) mount check for unmatched uids
-w) Add support for new vfs entry points for setlease and fallocate
+w) Add support for new vfs entry point for fallocate
x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of
processes can proceed better in parallel (on the server)
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index 224d7bbd..e654dfd 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -64,7 +64,9 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t maxbuf)
{
const struct TCP_Server_Info *server = cookie_netfs_data;
- const struct sockaddr *sa = (struct sockaddr *) &server->addr.sockAddr;
+ const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr;
+ const struct sockaddr_in *addr = (struct sockaddr_in *) sa;
+ const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa;
struct cifs_server_key *key = buffer;
uint16_t key_len = sizeof(struct cifs_server_key);
@@ -76,16 +78,16 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
*/
switch (sa->sa_family) {
case AF_INET:
- key->family = server->addr.sockAddr.sin_family;
- key->port = server->addr.sockAddr.sin_port;
- key->addr[0].ipv4_addr = server->addr.sockAddr.sin_addr;
+ key->family = sa->sa_family;
+ key->port = addr->sin_port;
+ key->addr[0].ipv4_addr = addr->sin_addr;
key_len += sizeof(key->addr[0].ipv4_addr);
break;
case AF_INET6:
- key->family = server->addr.sockAddr6.sin6_family;
- key->port = server->addr.sockAddr6.sin6_port;
- key->addr[0].ipv6_addr = server->addr.sockAddr6.sin6_addr;
+ key->family = sa->sa_family;
+ key->port = addr6->sin6_port;
+ key->addr[0].ipv6_addr = addr6->sin6_addr;
key_len += sizeof(key->addr[0].ipv6_addr);
break;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 103ab8b..ede9830 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -119,29 +119,27 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
"Display Internal CIFS Data Structures for Debugging\n"
"---------------------------------------------------\n");
seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
- seq_printf(m, "Features: ");
+ seq_printf(m, "Features:");
#ifdef CONFIG_CIFS_DFS_UPCALL
- seq_printf(m, "dfs");
- seq_putc(m, ' ');
+ seq_printf(m, " dfs");
#endif
#ifdef CONFIG_CIFS_FSCACHE
- seq_printf(m, "fscache");
- seq_putc(m, ' ');
+ seq_printf(m, " fscache");
#endif
#ifdef CONFIG_CIFS_WEAK_PW_HASH
- seq_printf(m, "lanman");
- seq_putc(m, ' ');
+ seq_printf(m, " lanman");
#endif
#ifdef CONFIG_CIFS_POSIX
- seq_printf(m, "posix");
- seq_putc(m, ' ');
+ seq_printf(m, " posix");
#endif
#ifdef CONFIG_CIFS_UPCALL
- seq_printf(m, "spnego");
- seq_putc(m, ' ');
+ seq_printf(m, " spnego");
#endif
#ifdef CONFIG_CIFS_XATTR
- seq_printf(m, "xattr");
+ seq_printf(m, " xattr");
+#endif
+#ifdef CONFIG_CIFS_ACL
+ seq_printf(m, " acl");
#endif
seq_putc(m, '\n');
seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 525ba59..7852cd6 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -15,7 +15,7 @@
* the GNU Lesser General Public License for more details.
*
*/
-#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
#ifndef _CIFS_FS_SB_H
#define _CIFS_FS_SB_H
@@ -42,12 +42,13 @@
#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */
struct cifs_sb_info {
- struct radix_tree_root tlink_tree;
-#define CIFS_TLINK_MASTER_TAG 0 /* is "master" (mount) tcon */
+ struct rb_root tlink_tree;
spinlock_t tlink_tree_lock;
+ struct tcon_link *master_tlink;
struct nls_table *local_nls;
unsigned int rsize;
unsigned int wsize;
+ unsigned long actimeo; /* attribute cache timeout (jiffies) */
atomic_t active;
uid_t mnt_uid;
gid_t mnt_gid;
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 8704490..4dfba82 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -98,6 +98,8 @@ struct key *
cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
{
struct TCP_Server_Info *server = sesInfo->server;
+ struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr;
+ struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr;
char *description, *dp;
size_t desc_len;
struct key *spnego_key;
@@ -127,10 +129,10 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
dp = description + strlen(description);
/* add the server address */
- if (server->addr.sockAddr.sin_family == AF_INET)
- sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr);
- else if (server->addr.sockAddr.sin_family == AF_INET6)
- sprintf(dp, "ip6=%pI6", &server->addr.sockAddr6.sin6_addr);
+ if (server->dstaddr.ss_family == AF_INET)
+ sprintf(dp, "ip4=%pI4", &sa->sin_addr);
+ else if (server->dstaddr.ss_family == AF_INET6)
+ sprintf(dp, "ip6=%pI6", &sa6->sin6_addr);
else
goto out;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index c9b4792..a437ec3 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -30,8 +30,6 @@
#include "cifs_debug.h"
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-
static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
{{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
{{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
@@ -560,7 +558,7 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
- return NULL;
+ return ERR_CAST(tlink);
xid = GetXid();
rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), fid, &pntsd, pacllen);
@@ -568,7 +566,9 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
cifs_put_tlink(tlink);
- cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen);
+ cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen);
+ if (rc)
+ return ERR_PTR(rc);
return pntsd;
}
@@ -583,7 +583,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
- return NULL;
+ return ERR_CAST(tlink);
tcon = tlink_tcon(tlink);
xid = GetXid();
@@ -591,23 +591,22 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, READ_CONTROL, 0,
&fid, &oplock, NULL, cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc) {
- cERROR(1, "Unable to open file to get ACL");
- goto out;
+ if (!rc) {
+ rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen);
+ CIFSSMBClose(xid, tcon, fid);
}
- rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen);
- cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen);
-
- CIFSSMBClose(xid, tcon, fid);
- out:
cifs_put_tlink(tlink);
FreeXid(xid);
+
+ cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen);
+ if (rc)
+ return ERR_PTR(rc);
return pntsd;
}
/* Retrieve an ACL from the server */
-static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
+struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
struct inode *inode, const char *path,
u32 *pacllen)
{
@@ -695,7 +694,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
}
/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
-void
+int
cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
struct inode *inode, const char *path, const __u16 *pfid)
{
@@ -711,17 +710,21 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen);
/* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
- if (pntsd)
+ if (IS_ERR(pntsd)) {
+ rc = PTR_ERR(pntsd);
+ cERROR(1, "%s: error %d getting sec desc", __func__, rc);
+ } else {
rc = parse_sec_desc(pntsd, acllen, fattr);
- if (rc)
- cFYI(1, "parse sec desc failed rc = %d", rc);
+ kfree(pntsd);
+ if (rc)
+ cERROR(1, "parse sec desc failed rc = %d", rc);
+ }
- kfree(pntsd);
- return;
+ return rc;
}
/* Convert mode bits to an ACL so we can update the ACL on the server */
-int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
+int mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode)
{
int rc = 0;
__u32 secdesclen = 0;
@@ -736,7 +739,10 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
/* Add three ACEs for owner, group, everyone getting rid of
other ACEs as chmod disables ACEs and set the security descriptor */
- if (pntsd) {
+ if (IS_ERR(pntsd)) {
+ rc = PTR_ERR(pntsd);
+ cERROR(1, "%s: error %d getting sec desc", __func__, rc);
+ } else {
/* allocate memory for the smb header,
set security descriptor request security descriptor
parameters, and secuirty descriptor itself */
@@ -766,4 +772,3 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
return rc;
}
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index 6c8096c..c4ae7d0 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -74,11 +74,7 @@ struct cifs_wksid {
char sidname[SIDNAMELENGTH];
} __attribute__((packed));
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-
extern int match_sid(struct cifs_sid *);
extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
-
#endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 7ac0056..66f3d50 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -43,21 +43,36 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
unsigned char *p24);
static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
- const struct session_key *key, char *signature)
+ struct TCP_Server_Info *server, char *signature)
{
- struct MD5Context context;
+ int rc;
- if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL))
+ if (cifs_pdu == NULL || signature == NULL || server == NULL)
return -EINVAL;
- cifs_MD5_init(&context);
- cifs_MD5_update(&context, (char *)&key->data, key->len);
- cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+ if (!server->secmech.sdescmd5) {
+ cERROR(1, "%s: Can't generate signature\n", __func__);
+ return -1;
+ }
+
+ rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
+ if (rc) {
+ cERROR(1, "%s: Oould not init md5\n", __func__);
+ return rc;
+ }
+
+ crypto_shash_update(&server->secmech.sdescmd5->shash,
+ server->session_key.response, server->session_key.len);
+
+ crypto_shash_update(&server->secmech.sdescmd5->shash,
+ cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+
+ rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
- cifs_MD5_final(signature, &context);
return 0;
}
+/* must be called with server->srv_mutex held */
int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
__u32 *pexpected_response_sequence_number)
{
@@ -70,17 +85,14 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0)
return rc;
- spin_lock(&GlobalMid_Lock);
cifs_pdu->Signature.Sequence.SequenceNumber =
cpu_to_le32(server->sequence_number);
cifs_pdu->Signature.Sequence.Reserved = 0;
*pexpected_response_sequence_number = server->sequence_number++;
server->sequence_number++;
- spin_unlock(&GlobalMid_Lock);
- rc = cifs_calculate_signature(cifs_pdu, &server->session_key,
- smb_signature);
+ rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
if (rc)
memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
else
@@ -90,16 +102,28 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
}
static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
- const struct session_key *key, char *signature)
+ struct TCP_Server_Info *server, char *signature)
{
- struct MD5Context context;
int i;
+ int rc;
- if ((iov == NULL) || (signature == NULL) || (key == NULL))
+ if (iov == NULL || signature == NULL || server == NULL)
return -EINVAL;
- cifs_MD5_init(&context);
- cifs_MD5_update(&context, (char *)&key->data, key->len);
+ if (!server->secmech.sdescmd5) {
+ cERROR(1, "%s: Can't generate signature\n", __func__);
+ return -1;
+ }
+
+ rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
+ if (rc) {
+ cERROR(1, "%s: Oould not init md5\n", __func__);
+ return rc;
+ }
+
+ crypto_shash_update(&server->secmech.sdescmd5->shash,
+ server->session_key.response, server->session_key.len);
+
for (i = 0; i < n_vec; i++) {
if (iov[i].iov_len == 0)
continue;
@@ -112,18 +136,19 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
if (i == 0) {
if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
break; /* nothing to sign or corrupt header */
- cifs_MD5_update(&context, iov[0].iov_base+4,
- iov[0].iov_len-4);
+ crypto_shash_update(&server->secmech.sdescmd5->shash,
+ iov[i].iov_base + 4, iov[i].iov_len - 4);
} else
- cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len);
+ crypto_shash_update(&server->secmech.sdescmd5->shash,
+ iov[i].iov_base, iov[i].iov_len);
}
- cifs_MD5_final(signature, &context);
+ rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
- return 0;
+ return rc;
}
-
+/* must be called with server->srv_mutex held */
int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
__u32 *pexpected_response_sequence_number)
{
@@ -137,17 +162,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0)
return rc;
- spin_lock(&GlobalMid_Lock);
cifs_pdu->Signature.Sequence.SequenceNumber =
cpu_to_le32(server->sequence_number);
cifs_pdu->Signature.Sequence.Reserved = 0;
*pexpected_response_sequence_number = server->sequence_number++;
server->sequence_number++;
- spin_unlock(&GlobalMid_Lock);
- rc = cifs_calc_signature2(iov, n_vec, &server->session_key,
- smb_signature);
+ rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
if (rc)
memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
else
@@ -157,14 +179,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
}
int cifs_verify_signature(struct smb_hdr *cifs_pdu,
- const struct session_key *session_key,
+ struct TCP_Server_Info *server,
__u32 expected_sequence_number)
{
unsigned int rc;
char server_response_sig[8];
char what_we_think_sig_should_be[20];
- if (cifs_pdu == NULL || session_key == NULL)
+ if (cifs_pdu == NULL || server == NULL)
return -EINVAL;
if (cifs_pdu->Command == SMB_COM_NEGOTIATE)
@@ -193,7 +215,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
cpu_to_le32(expected_sequence_number);
cifs_pdu->Signature.Sequence.Reserved = 0;
- rc = cifs_calculate_signature(cifs_pdu, session_key,
+ rc = cifs_calculate_signature(cifs_pdu, server,
what_we_think_sig_should_be);
if (rc)
@@ -209,18 +231,28 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
}
-/* We fill in key by putting in 40 byte array which was allocated by caller */
-int cifs_calculate_session_key(struct session_key *key, const char *rn,
- const char *password)
+/* first calculate 24 bytes ntlm response and then 16 byte session key */
+int setup_ntlm_response(struct cifsSesInfo *ses)
{
- char temp_key[16];
- if ((key == NULL) || (rn == NULL))
+ unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
+ char temp_key[CIFS_SESS_KEY_SIZE];
+
+ if (!ses)
return -EINVAL;
- E_md4hash(password, temp_key);
- mdfour(key->data.ntlm, temp_key, 16);
- memcpy(key->data.ntlm+16, rn, CIFS_SESS_KEY_SIZE);
- key->len = 40;
+ ses->auth_key.response = kmalloc(temp_len, GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ cERROR(1, "NTLM can't allocate (%u bytes) memory", temp_len);
+ return -ENOMEM;
+ }
+ ses->auth_key.len = temp_len;
+
+ SMBNTencrypt(ses->password, ses->server->cryptkey,
+ ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+
+ E_md4hash(ses->password, temp_key);
+ mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
+
return 0;
}
@@ -294,15 +326,15 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
* two times the unicode length of a server name +
* size of a timestamp (which is 8 bytes).
*/
- ses->tilen = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8;
- ses->tiblob = kzalloc(ses->tilen, GFP_KERNEL);
- if (!ses->tiblob) {
- ses->tilen = 0;
+ ses->auth_key.len = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8;
+ ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ ses->auth_key.len = 0;
cERROR(1, "Challenge target info allocation failure");
return -ENOMEM;
}
- blobptr = ses->tiblob;
+ blobptr = ses->auth_key.response;
attrptr = (struct ntlmssp2_name *) blobptr;
attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME);
@@ -357,7 +389,7 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
* about target string i.e. for some, just user name might suffice.
*/
static int
-find_domain_name(struct cifsSesInfo *ses)
+find_domain_name(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
{
unsigned int attrsize;
unsigned int type;
@@ -366,11 +398,11 @@ find_domain_name(struct cifsSesInfo *ses)
unsigned char *blobend;
struct ntlmssp2_name *attrptr;
- if (!ses->tilen || !ses->tiblob)
+ if (!ses->auth_key.len || !ses->auth_key.response)
return 0;
- blobptr = ses->tiblob;
- blobend = ses->tiblob + ses->tilen;
+ blobptr = ses->auth_key.response;
+ blobend = blobptr + ses->auth_key.len;
while (blobptr + onesize < blobend) {
attrptr = (struct ntlmssp2_name *) blobptr;
@@ -386,16 +418,13 @@ find_domain_name(struct cifsSesInfo *ses)
if (!attrsize)
break;
if (!ses->domainName) {
- struct nls_table *default_nls;
ses->domainName =
kmalloc(attrsize + 1, GFP_KERNEL);
if (!ses->domainName)
return -ENOMEM;
- default_nls = load_nls_default();
cifs_from_ucs2(ses->domainName,
(__le16 *)blobptr, attrsize, attrsize,
- default_nls, false);
- unload_nls(default_nls);
+ nls_cp, false);
break;
}
}
@@ -405,82 +434,136 @@ find_domain_name(struct cifsSesInfo *ses)
return 0;
}
-static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
+static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash,
const struct nls_table *nls_cp)
{
int rc = 0;
int len;
- char nt_hash[16];
- struct HMACMD5Context *pctxt;
+ char nt_hash[CIFS_NTHASH_SIZE];
wchar_t *user;
wchar_t *domain;
+ wchar_t *server;
- pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL);
-
- if (pctxt == NULL)
- return -ENOMEM;
+ if (!ses->server->secmech.sdeschmacmd5) {
+ cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
+ return -1;
+ }
/* calculate md4 hash of password */
E_md4hash(ses->password, nt_hash);
- /* convert Domainname to unicode and uppercase */
- hmac_md5_init_limK_to_64(nt_hash, 16, pctxt);
+ crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
+ CIFS_NTHASH_SIZE);
+
+ rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+ if (rc) {
+ cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n");
+ return rc;
+ }
/* convert ses->userName to unicode and uppercase */
len = strlen(ses->userName);
user = kmalloc(2 + (len * 2), GFP_KERNEL);
- if (user == NULL)
+ if (user == NULL) {
+ cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
+ rc = -ENOMEM;
goto calc_exit_2;
+ }
len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
UniStrupr(user);
- hmac_md5_update((char *)user, 2*len, pctxt);
+
+ crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ (char *)user, 2 * len);
/* convert ses->domainName to unicode and uppercase */
if (ses->domainName) {
len = strlen(ses->domainName);
domain = kmalloc(2 + (len * 2), GFP_KERNEL);
- if (domain == NULL)
+ if (domain == NULL) {
+ cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
+ rc = -ENOMEM;
goto calc_exit_1;
+ }
len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
nls_cp);
- /* the following line was removed since it didn't work well
- with lower cased domain name that passed as an option.
- Maybe converting the domain name earlier makes sense */
- /* UniStrupr(domain); */
-
- hmac_md5_update((char *)domain, 2*len, pctxt);
-
+ crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ (char *)domain, 2 * len);
kfree(domain);
+ } else if (ses->serverName) {
+ len = strlen(ses->serverName);
+
+ server = kmalloc(2 + (len * 2), GFP_KERNEL);
+ if (server == NULL) {
+ cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
+ rc = -ENOMEM;
+ goto calc_exit_1;
+ }
+ len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
+ nls_cp);
+ crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ (char *)server, 2 * len);
+ kfree(server);
}
+
+ rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+ ntlmv2_hash);
+
calc_exit_1:
kfree(user);
calc_exit_2:
- /* BB FIXME what about bytes 24 through 40 of the signing key?
- compare with the NTLM example */
- hmac_md5_final(ses->ntlmv2_hash, pctxt);
+ return rc;
+}
+
+static int
+CalcNTLMv2_response(const struct cifsSesInfo *ses, char *ntlmv2_hash)
+{
+ int rc;
+ unsigned int offset = CIFS_SESS_KEY_SIZE + 8;
+
+ if (!ses->server->secmech.sdeschmacmd5) {
+ cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
+ return -1;
+ }
+
+ crypto_shash_setkey(ses->server->secmech.hmacmd5,
+ ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+
+ rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+ if (rc) {
+ cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
+ return rc;
+ }
+
+ if (ses->server->secType == RawNTLMSSP)
+ memcpy(ses->auth_key.response + offset,
+ ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+ else
+ memcpy(ses->auth_key.response + offset,
+ ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+ crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ ses->auth_key.response + offset, ses->auth_key.len - offset);
+
+ rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+ ses->auth_key.response + CIFS_SESS_KEY_SIZE);
- kfree(pctxt);
return rc;
}
+
int
-setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
- const struct nls_table *nls_cp)
+setup_ntlmv2_rsp(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
{
int rc;
- struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf;
- struct HMACMD5Context context;
-
- buf->blob_signature = cpu_to_le32(0x00000101);
- buf->reserved = 0;
- buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
- get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
- buf->reserved2 = 0;
+ int baselen;
+ unsigned int tilen;
+ struct ntlmv2_resp *buf;
+ char ntlmv2_hash[16];
+ unsigned char *tiblob = NULL; /* target info blob */
if (ses->server->secType == RawNTLMSSP) {
if (!ses->domainName) {
- rc = find_domain_name(ses);
+ rc = find_domain_name(ses, nls_cp);
if (rc) {
cERROR(1, "error %d finding domain name", rc);
goto setup_ntlmv2_rsp_ret;
@@ -490,51 +573,179 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
rc = build_avpair_blob(ses, nls_cp);
if (rc) {
cERROR(1, "error %d building av pair blob", rc);
- return rc;
+ goto setup_ntlmv2_rsp_ret;
}
}
- /* calculate buf->ntlmv2_hash */
- rc = calc_ntlmv2_hash(ses, nls_cp);
+ baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp);
+ tilen = ses->auth_key.len;
+ tiblob = ses->auth_key.response;
+
+ ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ rc = ENOMEM;
+ ses->auth_key.len = 0;
+ cERROR(1, "%s: Can't allocate auth blob", __func__);
+ goto setup_ntlmv2_rsp_ret;
+ }
+ ses->auth_key.len += baselen;
+
+ buf = (struct ntlmv2_resp *)
+ (ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+ buf->blob_signature = cpu_to_le32(0x00000101);
+ buf->reserved = 0;
+ buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
+ get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
+ buf->reserved2 = 0;
+
+ memcpy(ses->auth_key.response + baselen, tiblob, tilen);
+
+ /* calculate ntlmv2_hash */
+ rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
if (rc) {
cERROR(1, "could not get v2 hash rc %d", rc);
goto setup_ntlmv2_rsp_ret;
}
- CalcNTLMv2_response(ses, resp_buf);
+
+ /* calculate first part of the client response (CR1) */
+ rc = CalcNTLMv2_response(ses, ntlmv2_hash);
+ if (rc) {
+ cERROR(1, "Could not calculate CR1 rc: %d", rc);
+ goto setup_ntlmv2_rsp_ret;
+ }
/* now calculate the session key for NTLMv2 */
- hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context);
- hmac_md5_update(resp_buf, 16, &context);
- hmac_md5_final(ses->auth_key.data.ntlmv2.key, &context);
+ crypto_shash_setkey(ses->server->secmech.hmacmd5,
+ ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+
+ rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+ if (rc) {
+ cERROR(1, "%s: Could not init hmacmd5\n", __func__);
+ goto setup_ntlmv2_rsp_ret;
+ }
- memcpy(&ses->auth_key.data.ntlmv2.resp, resp_buf,
- sizeof(struct ntlmv2_resp));
- ses->auth_key.len = 16 + sizeof(struct ntlmv2_resp);
+ crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ CIFS_HMAC_MD5_HASH_SIZE);
- return 0;
+ rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+ ses->auth_key.response);
setup_ntlmv2_rsp_ret:
- kfree(ses->tiblob);
- ses->tiblob = NULL;
- ses->tilen = 0;
+ kfree(tiblob);
return rc;
}
-void CalcNTLMv2_response(const struct cifsSesInfo *ses,
- char *v2_session_response)
+int
+calc_seckey(struct cifsSesInfo *ses)
{
- struct HMACMD5Context context;
- /* rest of v2 struct already generated */
- memcpy(v2_session_response + 8, ses->cryptKey, 8);
- hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context);
+ int rc;
+ struct crypto_blkcipher *tfm_arc4;
+ struct scatterlist sgin, sgout;
+ struct blkcipher_desc desc;
+ unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
+
+ get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
+
+ tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+ if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
+ cERROR(1, "could not allocate crypto API arc4\n");
+ return PTR_ERR(tfm_arc4);
+ }
- hmac_md5_update(v2_session_response+8,
- sizeof(struct ntlmv2_resp) - 8, &context);
+ desc.tfm = tfm_arc4;
- if (ses->tilen)
- hmac_md5_update(ses->tiblob, ses->tilen, &context);
+ crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response,
+ CIFS_SESS_KEY_SIZE);
- hmac_md5_final(v2_session_response, &context);
-/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */
+ sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE);
+ sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
+
+ rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
+ if (rc) {
+ cERROR(1, "could not encrypt session key rc: %d\n", rc);
+ crypto_free_blkcipher(tfm_arc4);
+ return rc;
+ }
+
+ /* make secondary_key/nonce as session key */
+ memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE);
+ /* and make len as that of session key only */
+ ses->auth_key.len = CIFS_SESS_KEY_SIZE;
+
+ crypto_free_blkcipher(tfm_arc4);
+
+ return 0;
+}
+
+void
+cifs_crypto_shash_release(struct TCP_Server_Info *server)
+{
+ if (server->secmech.md5)
+ crypto_free_shash(server->secmech.md5);
+
+ if (server->secmech.hmacmd5)
+ crypto_free_shash(server->secmech.hmacmd5);
+
+ kfree(server->secmech.sdeschmacmd5);
+
+ kfree(server->secmech.sdescmd5);
+}
+
+int
+cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
+{
+ int rc;
+ unsigned int size;
+
+ server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
+ if (!server->secmech.hmacmd5 ||
+ IS_ERR(server->secmech.hmacmd5)) {
+ cERROR(1, "could not allocate crypto hmacmd5\n");
+ return PTR_ERR(server->secmech.hmacmd5);
+ }
+
+ server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
+ if (!server->secmech.md5 || IS_ERR(server->secmech.md5)) {
+ cERROR(1, "could not allocate crypto md5\n");
+ rc = PTR_ERR(server->secmech.md5);
+ goto crypto_allocate_md5_fail;
+ }
+
+ size = sizeof(struct shash_desc) +
+ crypto_shash_descsize(server->secmech.hmacmd5);
+ server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
+ if (!server->secmech.sdeschmacmd5) {
+ cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n");
+ rc = -ENOMEM;
+ goto crypto_allocate_hmacmd5_sdesc_fail;
+ }
+ server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5;
+ server->secmech.sdeschmacmd5->shash.flags = 0x0;
+
+
+ size = sizeof(struct shash_desc) +
+ crypto_shash_descsize(server->secmech.md5);
+ server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
+ if (!server->secmech.sdescmd5) {
+ cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n");
+ rc = -ENOMEM;
+ goto crypto_allocate_md5_sdesc_fail;
+ }
+ server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
+ server->secmech.sdescmd5->shash.flags = 0x0;
+
+ return 0;
+
+crypto_allocate_md5_sdesc_fail:
+ kfree(server->secmech.sdeschmacmd5);
+
+crypto_allocate_hmacmd5_sdesc_fail:
+ crypto_free_shash(server->secmech.md5);
+
+crypto_allocate_md5_fail:
+ crypto_free_shash(server->secmech.hmacmd5);
+
+ return rc;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3437163..5e7075d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -116,7 +116,7 @@ cifs_read_super(struct super_block *sb, void *data,
return -ENOMEM;
spin_lock_init(&cifs_sb->tlink_tree_lock);
- INIT_RADIX_TREE(&cifs_sb->tlink_tree, GFP_KERNEL);
+ cifs_sb->tlink_tree = RB_ROOT;
rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
if (rc) {
@@ -283,10 +283,13 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static int cifs_permission(struct inode *inode, int mask)
+static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
{
struct cifs_sb_info *cifs_sb;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
cifs_sb = CIFS_SB(inode->i_sb);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
@@ -298,7 +301,7 @@ static int cifs_permission(struct inode *inode, int mask)
on the client (above and beyond ACL on servers) for
servers which do not support setting and viewing mode bits,
so allowing client to check permissions is useful */
- return generic_permission(inode, mask, NULL);
+ return generic_permission(inode, mask, flags, NULL);
}
static struct kmem_cache *cifs_inode_cachep;
@@ -318,16 +321,16 @@ cifs_alloc_inode(struct super_block *sb)
return NULL;
cifs_inode->cifsAttrs = 0x20; /* default */
cifs_inode->time = 0;
- cifs_inode->write_behind_rc = 0;
/* Until the file is open and we have gotten oplock
info back from the server, can not assume caching of
file data or metadata */
- cifs_inode->clientCanCacheRead = false;
- cifs_inode->clientCanCacheAll = false;
+ cifs_set_oplock_level(cifs_inode, 0);
cifs_inode->delete_pending = false;
cifs_inode->invalid_mapping = false;
cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
cifs_inode->server_eof = 0;
+ cifs_inode->uniqueid = 0;
+ cifs_inode->createtime = 0;
/* Can not set i_flags here - they get immediately overwritten
to zero by the VFS */
@@ -336,10 +339,17 @@ cifs_alloc_inode(struct super_block *sb)
return &cifs_inode->vfs_inode;
}
+static void cifs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
+}
+
static void
cifs_destroy_inode(struct inode *inode)
{
- kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
+ call_rcu(&inode->i_rcu, cifs_i_callback);
}
static void
@@ -353,18 +363,19 @@ cifs_evict_inode(struct inode *inode)
static void
cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
{
+ struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr;
+ struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr;
+
seq_printf(s, ",addr=");
- switch (server->addr.sockAddr.sin_family) {
+ switch (server->dstaddr.ss_family) {
case AF_INET:
- seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr);
+ seq_printf(s, "%pI4", &sa->sin_addr.s_addr);
break;
case AF_INET6:
- seq_printf(s, "%pI6",
- &server->addr.sockAddr6.sin6_addr.s6_addr);
- if (server->addr.sockAddr6.sin6_scope_id)
- seq_printf(s, "%%%u",
- server->addr.sockAddr6.sin6_scope_id);
+ seq_printf(s, "%pI6", &sa6->sin6_addr.s6_addr);
+ if (sa6->sin6_scope_id)
+ seq_printf(s, "%%%u", sa6->sin6_scope_id);
break;
default:
seq_printf(s, "(unknown)");
@@ -460,9 +471,13 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
seq_printf(s, ",acl");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
seq_printf(s, ",mfsymlinks");
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE)
+ seq_printf(s, ",fsc");
seq_printf(s, ",rsize=%d", cifs_sb->rsize);
seq_printf(s, ",wsize=%d", cifs_sb->wsize);
+ /* convert actimeo and display it in seconds */
+ seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
return 0;
}
@@ -545,9 +560,9 @@ static const struct super_operations cifs_super_ops = {
#endif
};
-static int
-cifs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *
+cifs_do_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
int rc;
struct super_block *sb;
@@ -557,18 +572,17 @@ cifs_get_sb(struct file_system_type *fs_type,
cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
if (IS_ERR(sb))
- return PTR_ERR(sb);
+ return ERR_CAST(sb);
sb->s_flags = flags;
rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0);
if (rc) {
deactivate_locked_super(sb);
- return rc;
+ return ERR_PTR(rc);
}
sb->s_flags |= MS_ACTIVE;
- simple_set_mnt(mnt, sb);
- return 0;
+ return dget(sb->s_root);
}
static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -634,7 +648,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
struct file_system_type cifs_fs_type = {
.owner = THIS_MODULE,
.name = "cifs",
- .get_sb = cifs_get_sb,
+ .mount = cifs_do_mount,
.kill_sb = kill_anon_super,
/* .fs_flags */
};
@@ -936,7 +950,6 @@ init_cifs(void)
GlobalCurrentXid = 0;
GlobalTotalActiveXid = 0;
GlobalMaxActiveXid = 0;
- memset(Local_System_Name, 0, 15);
spin_lock_init(&cifs_tcp_ses_lock);
spin_lock_init(&cifs_file_list_lock);
spin_lock_init(&GlobalMid_Lock);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f35795a..897b2b2 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -112,5 +112,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* EXPERIMENTAL */
-#define CIFS_VERSION "1.67"
+#define CIFS_VERSION "1.68"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3365e77f..606ca8b 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -25,6 +25,9 @@
#include <linux/workqueue.h>
#include "cifs_fs_sb.h"
#include "cifsacl.h"
+#include <crypto/internal/hash.h>
+#include <linux/scatterlist.h>
+
/*
* The sizes of various internal tables and strings
*/
@@ -42,6 +45,16 @@
#define CIFS_MIN_RCV_POOL 4
/*
+ * default attribute cache timeout (jiffies)
+ */
+#define CIFS_DEF_ACTIMEO (1 * HZ)
+
+/*
+ * max attribute cache timeout (jiffies) - 2^30
+ */
+#define CIFS_MAX_ACTIMEO (1 << 30)
+
+/*
* MAX_REQ is the maximum number of requests that WE will send
* on one socket concurrently. It also matches the most common
* value of max multiplex returned by servers. We may
@@ -74,7 +87,7 @@
* CIFS vfs client Status information (based on what we know.)
*/
- /* associated with each tcp and smb session */
+/* associated with each tcp and smb session */
enum statusEnum {
CifsNew = 0,
CifsGood,
@@ -99,14 +112,29 @@ enum protocolEnum {
struct session_key {
unsigned int len;
- union {
- char ntlm[CIFS_SESS_KEY_SIZE + 16];
- char krb5[CIFS_SESS_KEY_SIZE + 16]; /* BB: length correct? */
- struct {
- char key[16];
- struct ntlmv2_resp resp;
- } ntlmv2;
- } data;
+ char *response;
+};
+
+/* crypto security descriptor definition */
+struct sdesc {
+ struct shash_desc shash;
+ char ctx[];
+};
+
+/* crypto hashing related structure/fields, not specific to a sec mech */
+struct cifs_secmech {
+ struct crypto_shash *hmacmd5; /* hmac-md5 hash function */
+ struct crypto_shash *md5; /* md5 hash function */
+ struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */
+ struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
+};
+
+/* per smb session structure/fields */
+struct ntlmssp_auth {
+ __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */
+ __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */
+ unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */
+ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlmssp */
};
struct cifs_cred {
@@ -135,10 +163,7 @@ struct TCP_Server_Info {
char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
char *hostname; /* hostname portion of UNC string */
struct socket *ssocket;
- union {
- struct sockaddr_in sockAddr;
- struct sockaddr_in6 sockAddr6;
- } addr;
+ struct sockaddr_storage dstaddr;
struct sockaddr_storage srcaddr; /* locally bind to this IP */
wait_queue_head_t response_q;
wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/
@@ -179,12 +204,14 @@ struct TCP_Server_Info {
int capabilities; /* allow selective disabling of caps by smb sess */
int timeAdj; /* Adjust for difference in server time zone in sec */
__u16 CurrentMid; /* multiplex id - rotating counter */
+ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
/* 16th byte of RFC1001 workstation name is always null */
char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
- __u32 sequence_number; /* needed for CIFS PDU signature */
+ __u32 sequence_number; /* for signing, protected by srv_mutex */
struct session_key session_key;
unsigned long lstrp; /* when we got last response from this server */
u16 dialect; /* dialect index that server chose */
+ struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
/* extended security flavors that server supports */
bool sec_kerberos; /* supports plain Kerberos */
bool sec_mskerberos; /* supports legacy MS Kerberos */
@@ -222,11 +249,8 @@ struct cifsSesInfo {
char userName[MAX_USERNAME_SIZE + 1];
char *domainName;
char *password;
- char cryptKey[CIFS_CRYPTO_KEY_SIZE];
struct session_key auth_key;
- char ntlmv2_hash[16];
- unsigned int tilen; /* length of the target info blob */
- unsigned char *tiblob; /* target info blob in challenge response */
+ struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
bool need_reconnect:1; /* connection reset, uid now invalid */
};
/* no more than one of the following three session flags may be set */
@@ -319,7 +343,8 @@ struct cifsTconInfo {
* "get" on the container.
*/
struct tcon_link {
- unsigned long tl_index;
+ struct rb_node tl_rbnode;
+ uid_t tl_uid;
unsigned long tl_flags;
#define TCON_LINK_MASTER 0
#define TCON_LINK_PENDING 1
@@ -395,16 +420,19 @@ struct cifsFileInfo {
struct list_head llist; /* list of byte range locks we have. */
bool invalidHandle:1; /* file closed via session abend */
bool oplock_break_cancelled:1;
- atomic_t count; /* reference count */
+ int count; /* refcount protected by cifs_file_list_lock */
struct mutex fh_mutex; /* prevents reopen race after dead ses*/
struct cifs_search_info srch_inf;
struct work_struct oplock_break; /* work for oplock breaks */
};
-/* Take a reference on the file private data */
+/*
+ * Take a reference on the file private data. Must be called with
+ * cifs_file_list_lock held.
+ */
static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file)
{
- atomic_inc(&cifs_file->count);
+ ++cifs_file->count;
}
void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
@@ -417,7 +445,6 @@ struct cifsInodeInfo {
struct list_head lockList;
/* BB add in lists for dirty pages i.e. write caching info for oplock */
struct list_head openFileList;
- int write_behind_rc;
__u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
unsigned long time; /* jiffies of last update/check of inode */
bool clientCanCacheRead:1; /* read oplock */
@@ -426,6 +453,7 @@ struct cifsInodeInfo {
bool invalid_mapping:1; /* pagecache is invalid */
u64 server_eof; /* current file size on server */
u64 uniqueid; /* server inode number */
+ u64 createtime; /* creation time on server */
#ifdef CONFIG_CIFS_FSCACHE
struct fscache_cookie *fscache;
#endif
@@ -546,6 +574,7 @@ struct cifs_fattr {
u64 cf_uniqueid;
u64 cf_eof;
u64 cf_bytes;
+ u64 cf_createtime;
uid_t cf_uid;
gid_t cf_gid;
umode_t cf_mode;
@@ -668,7 +697,7 @@ require use of the stronger protocol */
* GlobalMid_Lock protects:
* list operations on pending_mid_q and oplockQ
* updates to XID counters, multiplex id and SMB sequence numbers
- * GlobalSMBSesLock protects:
+ * cifs_file_list_lock protects:
* list operations on tcp and SMB session lists and tCon lists
* f_owner.lock protects certain per file struct operations
* mapping->page_lock protects certain per page operations
@@ -726,8 +755,6 @@ GLOBAL_EXTERN unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */
GLOBAL_EXTERN unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */
GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above & list operations */
/* on midQ entries */
-GLOBAL_EXTERN char Local_System_Name[15];
-
/*
* Global counters, updated atomically
*/
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b0f4b56..de36b09 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -131,9 +131,20 @@
#define CIFS_CRYPTO_KEY_SIZE (8)
/*
+ * Size of the ntlm client response
+ */
+#define CIFS_AUTH_RESP_SIZE (24)
+
+/*
* Size of the session key (crypto key encrypted with the password
*/
-#define CIFS_SESS_KEY_SIZE (24)
+#define CIFS_SESS_KEY_SIZE (16)
+
+#define CIFS_CLIENT_CHALLENGE_SIZE (8)
+#define CIFS_SERVER_CHALLENGE_SIZE (8)
+#define CIFS_HMAC_MD5_HASH_SIZE (16)
+#define CIFS_CPHTXT_SIZE (16)
+#define CIFS_NTHASH_SIZE (16)
/*
* Maximum user name length
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e593c40..e6d1481 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -54,7 +54,8 @@ do { \
__func__, curr_xid, (int)rc); \
} while (0)
extern char *build_path_from_dentry(struct dentry *);
-extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb);
+extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
+ struct cifsTconInfo *tcon);
extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
extern char *cifs_compose_mount_options(const char *sb_mountdata,
const char *fullpath, const struct dfs_info3_param *ref,
@@ -79,9 +80,7 @@ extern bool is_valid_oplock_break(struct smb_hdr *smb,
struct TCP_Server_Info *);
extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
-#endif
extern unsigned int smbCalcSize(struct smb_hdr *ptr);
extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
extern int decode_negTokenInit(unsigned char *security_blob, int length,
@@ -104,6 +103,7 @@ extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
extern u64 cifs_UnixTimeToNT(struct timespec);
extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
int offset);
+extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle,
struct file *file, struct tcon_link *tlink,
@@ -129,10 +129,12 @@ extern int cifs_get_file_info_unix(struct file *filp);
extern int cifs_get_inode_info_unix(struct inode **pinode,
const unsigned char *search_path,
struct super_block *sb, int xid);
-extern void cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
+extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
struct cifs_fattr *fattr, struct inode *inode,
const char *path, const __u16 *pfid);
-extern int mode_to_acl(struct inode *inode, const char *path, __u64);
+extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64);
+extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
+ const char *, u32 *);
extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
const char *);
@@ -362,13 +364,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
__u32 *);
extern int cifs_verify_signature(struct smb_hdr *,
- const struct session_key *session_key,
+ struct TCP_Server_Info *server,
__u32 expected_sequence_number);
-extern int cifs_calculate_session_key(struct session_key *key, const char *rn,
- const char *pass);
-extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
-extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
- const struct nls_table *);
+extern void SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
+extern int setup_ntlm_response(struct cifsSesInfo *);
+extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *);
+extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
+extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
+extern int calc_seckey(struct cifsSesInfo *);
+
#ifdef CONFIG_CIFS_WEAK_PW_HASH
extern void calc_lanman_hash(const char *password, const char *cryptkey,
bool encrypt, char *lnm_session_key);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index e98f1f3..2f6795e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -401,15 +401,12 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) {
cFYI(1, "Kerberos only mechanism, enable extended security");
pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
- }
-#ifdef CONFIG_CIFS_EXPERIMENTAL
- else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
+ } else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) {
cFYI(1, "NTLMSSP only mechanism, enable extended security");
pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
}
-#endif
count = 0;
for (i = 0; i < CIFS_NUM_PROT; i++) {
@@ -503,7 +500,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
if (rsp->EncryptionKeyLength ==
cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
- memcpy(ses->cryptKey, rsp->EncryptionKey,
+ memcpy(ses->server->cryptkey, rsp->EncryptionKey,
CIFS_CRYPTO_KEY_SIZE);
} else if (server->secMode & SECMODE_PW_ENCRYPT) {
rc = -EIO; /* need cryptkey unless plain text */
@@ -574,7 +571,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
server->timeAdj *= 60;
if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
- memcpy(ses->cryptKey, pSMBr->u.EncryptionKey,
+ memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
CIFS_CRYPTO_KEY_SIZE);
} else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC)
&& (pSMBr->EncryptionKeyLength == 0)) {
@@ -2478,95 +2475,6 @@ querySymLinkRetry:
}
#ifdef CONFIG_CIFS_EXPERIMENTAL
-/* Initialize NT TRANSACT SMB into small smb request buffer.
- This assumes that all NT TRANSACTS that we init here have
- total parm and data under about 400 bytes (to fit in small cifs
- buffer size), which is the case so far, it easily fits. NB:
- Setup words themselves and ByteCount
- MaxSetupCount (size of returned setup area) and
- MaxParameterCount (returned parms size) must be set by caller */
-static int
-smb_init_nttransact(const __u16 sub_command, const int setup_count,
- const int parm_len, struct cifsTconInfo *tcon,
- void **ret_buf)
-{
- int rc;
- __u32 temp_offset;
- struct smb_com_ntransact_req *pSMB;
-
- rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon,
- (void **)&pSMB);
- if (rc)
- return rc;
- *ret_buf = (void *)pSMB;
- pSMB->Reserved = 0;
- pSMB->TotalParameterCount = cpu_to_le32(parm_len);
- pSMB->TotalDataCount = 0;
- pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
- MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
- pSMB->ParameterCount = pSMB->TotalParameterCount;
- pSMB->DataCount = pSMB->TotalDataCount;
- temp_offset = offsetof(struct smb_com_ntransact_req, Parms) +
- (setup_count * 2) - 4 /* for rfc1001 length itself */;
- pSMB->ParameterOffset = cpu_to_le32(temp_offset);
- pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len);
- pSMB->SetupCount = setup_count; /* no need to le convert byte fields */
- pSMB->SubCommand = cpu_to_le16(sub_command);
- return 0;
-}
-
-static int
-validate_ntransact(char *buf, char **ppparm, char **ppdata,
- __u32 *pparmlen, __u32 *pdatalen)
-{
- char *end_of_smb;
- __u32 data_count, data_offset, parm_count, parm_offset;
- struct smb_com_ntransact_rsp *pSMBr;
-
- *pdatalen = 0;
- *pparmlen = 0;
-
- if (buf == NULL)
- return -EINVAL;
-
- pSMBr = (struct smb_com_ntransact_rsp *)buf;
-
- /* ByteCount was converted from little endian in SendReceive */
- end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
- (char *)&pSMBr->ByteCount;
-
- data_offset = le32_to_cpu(pSMBr->DataOffset);
- data_count = le32_to_cpu(pSMBr->DataCount);
- parm_offset = le32_to_cpu(pSMBr->ParameterOffset);
- parm_count = le32_to_cpu(pSMBr->ParameterCount);
-
- *ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset;
- *ppdata = (char *)&pSMBr->hdr.Protocol + data_offset;
-
- /* should we also check that parm and data areas do not overlap? */
- if (*ppparm > end_of_smb) {
- cFYI(1, "parms start after end of smb");
- return -EINVAL;
- } else if (parm_count + *ppparm > end_of_smb) {
- cFYI(1, "parm end after end of smb");
- return -EINVAL;
- } else if (*ppdata > end_of_smb) {
- cFYI(1, "data starts after end of smb");
- return -EINVAL;
- } else if (data_count + *ppdata > end_of_smb) {
- cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
- *ppdata, data_count, (data_count + *ppdata),
- end_of_smb, pSMBr);
- return -EINVAL;
- } else if (parm_count + data_count > pSMBr->ByteCount) {
- cFYI(1, "parm count and data count larger than SMB");
- return -EINVAL;
- }
- *pdatalen = data_count;
- *pparmlen = parm_count;
- return 0;
-}
-
int
CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
const unsigned char *searchName,
@@ -3056,7 +2964,97 @@ GetExtAttrOut:
#endif /* CONFIG_POSIX */
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_ACL
+/*
+ * Initialize NT TRANSACT SMB into small smb request buffer. This assumes that
+ * all NT TRANSACTS that we init here have total parm and data under about 400
+ * bytes (to fit in small cifs buffer size), which is the case so far, it
+ * easily fits. NB: Setup words themselves and ByteCount MaxSetupCount (size of
+ * returned setup area) and MaxParameterCount (returned parms size) must be set
+ * by caller
+ */
+static int
+smb_init_nttransact(const __u16 sub_command, const int setup_count,
+ const int parm_len, struct cifsTconInfo *tcon,
+ void **ret_buf)
+{
+ int rc;
+ __u32 temp_offset;
+ struct smb_com_ntransact_req *pSMB;
+
+ rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon,
+ (void **)&pSMB);
+ if (rc)
+ return rc;
+ *ret_buf = (void *)pSMB;
+ pSMB->Reserved = 0;
+ pSMB->TotalParameterCount = cpu_to_le32(parm_len);
+ pSMB->TotalDataCount = 0;
+ pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
+ MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
+ pSMB->ParameterCount = pSMB->TotalParameterCount;
+ pSMB->DataCount = pSMB->TotalDataCount;
+ temp_offset = offsetof(struct smb_com_ntransact_req, Parms) +
+ (setup_count * 2) - 4 /* for rfc1001 length itself */;
+ pSMB->ParameterOffset = cpu_to_le32(temp_offset);
+ pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len);
+ pSMB->SetupCount = setup_count; /* no need to le convert byte fields */
+ pSMB->SubCommand = cpu_to_le16(sub_command);
+ return 0;
+}
+
+static int
+validate_ntransact(char *buf, char **ppparm, char **ppdata,
+ __u32 *pparmlen, __u32 *pdatalen)
+{
+ char *end_of_smb;
+ __u32 data_count, data_offset, parm_count, parm_offset;
+ struct smb_com_ntransact_rsp *pSMBr;
+
+ *pdatalen = 0;
+ *pparmlen = 0;
+
+ if (buf == NULL)
+ return -EINVAL;
+
+ pSMBr = (struct smb_com_ntransact_rsp *)buf;
+
+ /* ByteCount was converted from little endian in SendReceive */
+ end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
+ (char *)&pSMBr->ByteCount;
+
+ data_offset = le32_to_cpu(pSMBr->DataOffset);
+ data_count = le32_to_cpu(pSMBr->DataCount);
+ parm_offset = le32_to_cpu(pSMBr->ParameterOffset);
+ parm_count = le32_to_cpu(pSMBr->ParameterCount);
+
+ *ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset;
+ *ppdata = (char *)&pSMBr->hdr.Protocol + data_offset;
+
+ /* should we also check that parm and data areas do not overlap? */
+ if (*ppparm > end_of_smb) {
+ cFYI(1, "parms start after end of smb");
+ return -EINVAL;
+ } else if (parm_count + *ppparm > end_of_smb) {
+ cFYI(1, "parm end after end of smb");
+ return -EINVAL;
+ } else if (*ppdata > end_of_smb) {
+ cFYI(1, "data starts after end of smb");
+ return -EINVAL;
+ } else if (data_count + *ppdata > end_of_smb) {
+ cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
+ *ppdata, data_count, (data_count + *ppdata),
+ end_of_smb, pSMBr);
+ return -EINVAL;
+ } else if (parm_count + data_count > pSMBr->ByteCount) {
+ cFYI(1, "parm count and data count larger than SMB");
+ return -EINVAL;
+ }
+ *pdatalen = data_count;
+ *pparmlen = parm_count;
+ return 0;
+}
+
/* Get Security Descriptor (by handle) from remote server for a file or dir */
int
CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
@@ -3214,7 +3212,7 @@ setCifsAclRetry:
return (rc);
}
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
+#endif /* CONFIG_CIFS_ACL */
/* Legacy Query Path Information call for lookup to old servers such
as Win9x/WinME */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7e73176..a65d311 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -64,8 +64,8 @@ struct smb_vol {
char *UNC;
char *UNCip;
char *iocharset; /* local code page for mapping to and from Unicode */
- char source_rfc1001_name[16]; /* netbios name of client */
- char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */
+ char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
+ char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
uid_t cred_uid;
uid_t linux_uid;
gid_t linux_gid;
@@ -105,6 +105,7 @@ struct smb_vol {
unsigned int wsize;
bool sockopt_tcp_nodelay:1;
unsigned short int port;
+ unsigned long actimeo; /* attribute cache timeout (jiffies) */
char *prepath;
struct sockaddr_storage srcaddr; /* allow binding to a local IP */
struct nls_table *local_nls;
@@ -114,8 +115,9 @@ struct smb_vol {
#define TLINK_ERROR_EXPIRE (1 * HZ)
#define TLINK_IDLE_EXPIRE (600 * HZ)
-static int ipv4_connect(struct TCP_Server_Info *server);
-static int ipv6_connect(struct TCP_Server_Info *server);
+static int ip_connect(struct TCP_Server_Info *server);
+static int generic_ip_connect(struct TCP_Server_Info *server);
+static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
static void cifs_prune_tlinks(struct work_struct *work);
/*
@@ -175,6 +177,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
}
server->sequence_number = 0;
server->session_estab = false;
+ kfree(server->session_key.response);
+ server->session_key.response = NULL;
+ server->session_key.len = 0;
spin_lock(&GlobalMid_Lock);
list_for_each(tmp, &server->pending_mid_q) {
@@ -195,10 +200,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
while ((server->tcpStatus != CifsExiting) &&
(server->tcpStatus != CifsGood)) {
try_to_freeze();
- if (server->addr.sockAddr6.sin6_family == AF_INET6)
- rc = ipv6_connect(server);
- else
- rc = ipv4_connect(server);
+
+ /* we should try only the port we connected to before */
+ rc = generic_ip_connect(server);
if (rc) {
cFYI(1, "reconnect error %d", rc);
msleep(3000);
@@ -472,7 +476,7 @@ incomplete_rcv:
* initialize frame)
*/
cifs_set_port((struct sockaddr *)
- &server->addr.sockAddr, CIFS_PORT);
+ &server->dstaddr, CIFS_PORT);
cifs_reconnect(server);
csocket = server->ssocket;
wake_up(&server->response_q);
@@ -802,24 +806,21 @@ cifs_parse_mount_options(char *options, const char *devname,
short int override_gid = -1;
bool uid_specified = false;
bool gid_specified = false;
+ char *nodename = utsname()->nodename;
separator[0] = ',';
separator[1] = 0;
- if (Local_System_Name[0] != 0)
- memcpy(vol->source_rfc1001_name, Local_System_Name, 15);
- else {
- char *nodename = utsname()->nodename;
- int n = strnlen(nodename, 15);
- memset(vol->source_rfc1001_name, 0x20, 15);
- for (i = 0; i < n; i++) {
- /* does not have to be perfect mapping since field is
- informational, only used for servers that do not support
- port 445 and it can be overridden at mount time */
- vol->source_rfc1001_name[i] = toupper(nodename[i]);
- }
- }
- vol->source_rfc1001_name[15] = 0;
+ /*
+ * does not have to be perfect mapping since field is
+ * informational, only used for servers that do not support
+ * port 445 and it can be overridden at mount time
+ */
+ memset(vol->source_rfc1001_name, 0x20, RFC1001_NAME_LEN);
+ for (i = 0; i < strnlen(nodename, RFC1001_NAME_LEN); i++)
+ vol->source_rfc1001_name[i] = toupper(nodename[i]);
+
+ vol->source_rfc1001_name[RFC1001_NAME_LEN] = 0;
/* null target name indicates to use *SMBSERVR default called name
if we end up sending RFC1001 session initialize */
vol->target_rfc1001_name[0] = 0;
@@ -836,6 +837,8 @@ cifs_parse_mount_options(char *options, const char *devname,
/* default to using server inode numbers where available */
vol->server_ino = 1;
+ vol->actimeo = CIFS_DEF_ACTIMEO;
+
if (!options)
return 1;
@@ -981,13 +984,11 @@ cifs_parse_mount_options(char *options, const char *devname,
return 1;
} else if (strnicmp(value, "krb5", 4) == 0) {
vol->secFlg |= CIFSSEC_MAY_KRB5;
-#ifdef CONFIG_CIFS_EXPERIMENTAL
} else if (strnicmp(value, "ntlmsspi", 8) == 0) {
vol->secFlg |= CIFSSEC_MAY_NTLMSSP |
CIFSSEC_MUST_SIGN;
} else if (strnicmp(value, "ntlmssp", 7) == 0) {
vol->secFlg |= CIFSSEC_MAY_NTLMSSP;
-#endif
} else if (strnicmp(value, "ntlmv2i", 7) == 0) {
vol->secFlg |= CIFSSEC_MAY_NTLMV2 |
CIFSSEC_MUST_SIGN;
@@ -1064,7 +1065,7 @@ cifs_parse_mount_options(char *options, const char *devname,
}
i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
value, strlen(value));
- if (i < 0) {
+ if (i == 0) {
printk(KERN_WARNING "CIFS: Could not parse"
" srcaddr: %s\n",
value);
@@ -1164,22 +1165,22 @@ cifs_parse_mount_options(char *options, const char *devname,
if (!value || !*value || (*value == ' ')) {
cFYI(1, "invalid (empty) netbiosname");
} else {
- memset(vol->source_rfc1001_name, 0x20, 15);
- for (i = 0; i < 15; i++) {
- /* BB are there cases in which a comma can be
- valid in this workstation netbios name (and need
- special handling)? */
-
- /* We do not uppercase netbiosname for user */
+ memset(vol->source_rfc1001_name, 0x20,
+ RFC1001_NAME_LEN);
+ /*
+ * FIXME: are there cases in which a comma can
+ * be valid in workstation netbios name (and
+ * need special handling)?
+ */
+ for (i = 0; i < RFC1001_NAME_LEN; i++) {
+ /* don't ucase netbiosname for user */
if (value[i] == 0)
break;
- else
- vol->source_rfc1001_name[i] =
- value[i];
+ vol->source_rfc1001_name[i] = value[i];
}
/* The string has 16th byte zero still from
set at top of the function */
- if ((i == 15) && (value[i] != 0))
+ if (i == RFC1001_NAME_LEN && value[i] != 0)
printk(KERN_WARNING "CIFS: netbiosname"
" longer than 15 truncated.\n");
}
@@ -1189,7 +1190,8 @@ cifs_parse_mount_options(char *options, const char *devname,
cFYI(1, "empty server netbiosname specified");
} else {
/* last byte, type, is 0x20 for servr type */
- memset(vol->target_rfc1001_name, 0x20, 16);
+ memset(vol->target_rfc1001_name, 0x20,
+ RFC1001_NAME_LEN_WITH_NULL);
for (i = 0; i < 15; i++) {
/* BB are there cases in which a comma can be
@@ -1206,10 +1208,20 @@ cifs_parse_mount_options(char *options, const char *devname,
}
/* The string has 16th byte zero still from
set at top of the function */
- if ((i == 15) && (value[i] != 0))
+ if (i == RFC1001_NAME_LEN && value[i] != 0)
printk(KERN_WARNING "CIFS: server net"
"biosname longer than 15 truncated.\n");
}
+ } else if (strnicmp(data, "actimeo", 7) == 0) {
+ if (value && *value) {
+ vol->actimeo = HZ * simple_strtoul(value,
+ &value, 0);
+ if (vol->actimeo > CIFS_MAX_ACTIMEO) {
+ cERROR(1, "CIFS: attribute cache"
+ "timeout too large");
+ return 1;
+ }
+ }
} else if (strnicmp(data, "credentials", 4) == 0) {
/* ignore */
} else if (strnicmp(data, "version", 3) == 0) {
@@ -1327,10 +1339,8 @@ cifs_parse_mount_options(char *options, const char *devname,
vol->no_psx_acl = 0;
} else if (strnicmp(data, "noacl", 5) == 0) {
vol->no_psx_acl = 1;
-#ifdef CONFIG_CIFS_EXPERIMENTAL
} else if (strnicmp(data, "locallease", 6) == 0) {
vol->local_lease = 1;
-#endif
} else if (strnicmp(data, "sign", 4) == 0) {
vol->secFlg |= CIFSSEC_MUST_SIGN;
} else if (strnicmp(data, "seal", 4) == 0) {
@@ -1348,6 +1358,11 @@ cifs_parse_mount_options(char *options, const char *devname,
"supported. Instead set "
"/proc/fs/cifs/LookupCacheEnabled to 0\n");
} else if (strnicmp(data, "fsc", 3) == 0) {
+#ifndef CONFIG_CIFS_FSCACHE
+ cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE"
+ "kernel config option set");
+ return 1;
+#endif
vol->fsc = true;
} else if (strnicmp(data, "mfsymlinks", 10) == 0) {
vol->mfsymlinks = true;
@@ -1435,35 +1450,71 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
}
}
+/*
+ * If no port is specified in addr structure, we try to match with 445 port
+ * and if it fails - with 139 ports. It should be called only if address
+ * families of server and addr are equal.
+ */
+static bool
+match_port(struct TCP_Server_Info *server, struct sockaddr *addr)
+{
+ unsigned short int port, *sport;
+
+ switch (addr->sa_family) {
+ case AF_INET:
+ sport = &((struct sockaddr_in *) &server->dstaddr)->sin_port;
+ port = ((struct sockaddr_in *) addr)->sin_port;
+ break;
+ case AF_INET6:
+ sport = &((struct sockaddr_in6 *) &server->dstaddr)->sin6_port;
+ port = ((struct sockaddr_in6 *) addr)->sin6_port;
+ break;
+ default:
+ WARN_ON(1);
+ return false;
+ }
+
+ if (!port) {
+ port = htons(CIFS_PORT);
+ if (port == *sport)
+ return true;
+
+ port = htons(RFC1001_PORT);
+ }
+
+ return port == *sport;
+}
static bool
match_address(struct TCP_Server_Info *server, struct sockaddr *addr,
struct sockaddr *srcaddr)
{
- struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
- struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
-
switch (addr->sa_family) {
- case AF_INET:
- if (addr4->sin_addr.s_addr !=
- server->addr.sockAddr.sin_addr.s_addr)
- return false;
- if (addr4->sin_port &&
- addr4->sin_port != server->addr.sockAddr.sin_port)
+ case AF_INET: {
+ struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
+ struct sockaddr_in *srv_addr4 =
+ (struct sockaddr_in *)&server->dstaddr;
+
+ if (addr4->sin_addr.s_addr != srv_addr4->sin_addr.s_addr)
return false;
break;
- case AF_INET6:
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
+ struct sockaddr_in6 *srv_addr6 =
+ (struct sockaddr_in6 *)&server->dstaddr;
+
if (!ipv6_addr_equal(&addr6->sin6_addr,
- &server->addr.sockAddr6.sin6_addr))
+ &srv_addr6->sin6_addr))
return false;
- if (addr6->sin6_scope_id !=
- server->addr.sockAddr6.sin6_scope_id)
- return false;
- if (addr6->sin6_port &&
- addr6->sin6_port != server->addr.sockAddr6.sin6_port)
+ if (addr6->sin6_scope_id != srv_addr6->sin6_scope_id)
return false;
break;
}
+ default:
+ WARN_ON(1);
+ return false; /* don't expect to be here */
+ }
if (!srcip_matches(srcaddr, (struct sockaddr *)&server->srcaddr))
return false;
@@ -1530,6 +1581,9 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
(struct sockaddr *)&vol->srcaddr))
continue;
+ if (!match_port(server, addr))
+ continue;
+
if (!match_security(server, vol))
continue;
@@ -1560,8 +1614,13 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
server->tcpStatus = CifsExiting;
spin_unlock(&GlobalMid_Lock);
+ cifs_crypto_shash_release(server);
cifs_fscache_release_client_cookie(server);
+ kfree(server->session_key.response);
+ server->session_key.response = NULL;
+ server->session_key.len = 0;
+
task = xchg(&server->tsk, NULL);
if (task)
force_sig(SIGKILL, task);
@@ -1614,10 +1673,16 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
goto out_err;
}
+ rc = cifs_crypto_shash_allocate(tcp_ses);
+ if (rc) {
+ cERROR(1, "could not setup hash structures rc %d", rc);
+ goto out_err;
+ }
+
tcp_ses->hostname = extract_hostname(volume_info->UNC);
if (IS_ERR(tcp_ses->hostname)) {
rc = PTR_ERR(tcp_ses->hostname);
- goto out_err;
+ goto out_err_crypto_release;
}
tcp_ses->noblocksnd = volume_info->noblocksnd;
@@ -1651,17 +1716,16 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
cFYI(1, "attempting ipv6 connect");
/* BB should we allow ipv6 on port 139? */
/* other OS never observed in Wild doing 139 with v6 */
- memcpy(&tcp_ses->addr.sockAddr6, sin_server6,
- sizeof(struct sockaddr_in6));
- rc = ipv6_connect(tcp_ses);
- } else {
- memcpy(&tcp_ses->addr.sockAddr, sin_server,
- sizeof(struct sockaddr_in));
- rc = ipv4_connect(tcp_ses);
- }
+ memcpy(&tcp_ses->dstaddr, sin_server6,
+ sizeof(struct sockaddr_in6));
+ } else
+ memcpy(&tcp_ses->dstaddr, sin_server,
+ sizeof(struct sockaddr_in));
+
+ rc = ip_connect(tcp_ses);
if (rc < 0) {
cERROR(1, "Error connecting to socket. Aborting operation");
- goto out_err;
+ goto out_err_crypto_release;
}
/*
@@ -1675,7 +1739,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
rc = PTR_ERR(tcp_ses->tsk);
cERROR(1, "error %d create cifsd thread", rc);
module_put(THIS_MODULE);
- goto out_err;
+ goto out_err_crypto_release;
}
/* thread spawned, put it on the list */
@@ -1687,6 +1751,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
return tcp_ses;
+out_err_crypto_release:
+ cifs_crypto_shash_release(tcp_ses);
+
out_err:
if (tcp_ses) {
if (!IS_ERR(tcp_ses->hostname))
@@ -1760,6 +1827,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
{
int rc = -ENOMEM, xid;
struct cifsSesInfo *ses;
+ struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
+ struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
xid = GetXid();
@@ -1801,16 +1870,12 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
if (ses == NULL)
goto get_ses_fail;
- ses->tilen = 0;
- ses->tiblob = NULL;
/* new SMB session uses our server ref */
ses->server = server;
- if (server->addr.sockAddr6.sin6_family == AF_INET6)
- sprintf(ses->serverName, "%pI6",
- &server->addr.sockAddr6.sin6_addr);
+ if (server->dstaddr.ss_family == AF_INET6)
+ sprintf(ses->serverName, "%pI6", &addr6->sin6_addr);
else
- sprintf(ses->serverName, "%pI4",
- &server->addr.sockAddr.sin_addr.s_addr);
+ sprintf(ses->serverName, "%pI4", &addr->sin_addr);
if (volume_info->username)
strncpy(ses->userName, volume_info->username,
@@ -1823,10 +1888,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
goto get_ses_fail;
}
if (volume_info->domainname) {
- int len = strlen(volume_info->domainname);
- ses->domainName = kmalloc(len + 1, GFP_KERNEL);
- if (ses->domainName)
- strcpy(ses->domainName, volume_info->domainname);
+ ses->domainName = kstrdup(volume_info->domainname, GFP_KERNEL);
+ if (!ses->domainName)
+ goto get_ses_fail;
}
ses->cred_uid = volume_info->cred_uid;
ses->linux_uid = volume_info->linux_uid;
@@ -2106,19 +2170,106 @@ bind_socket(struct TCP_Server_Info *server)
}
static int
-ipv4_connect(struct TCP_Server_Info *server)
+ip_rfc1001_connect(struct TCP_Server_Info *server)
+{
+ int rc = 0;
+ /*
+ * some servers require RFC1001 sessinit before sending
+ * negprot - BB check reconnection in case where second
+ * sessinit is sent but no second negprot
+ */
+ struct rfc1002_session_packet *ses_init_buf;
+ struct smb_hdr *smb_buf;
+ ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet),
+ GFP_KERNEL);
+ if (ses_init_buf) {
+ ses_init_buf->trailer.session_req.called_len = 32;
+
+ if (server->server_RFC1001_name &&
+ server->server_RFC1001_name[0] != 0)
+ rfc1002mangle(ses_init_buf->trailer.
+ session_req.called_name,
+ server->server_RFC1001_name,
+ RFC1001_NAME_LEN_WITH_NULL);
+ else
+ rfc1002mangle(ses_init_buf->trailer.
+ session_req.called_name,
+ DEFAULT_CIFS_CALLED_NAME,
+ RFC1001_NAME_LEN_WITH_NULL);
+
+ ses_init_buf->trailer.session_req.calling_len = 32;
+
+ /*
+ * calling name ends in null (byte 16) from old smb
+ * convention.
+ */
+ if (server->workstation_RFC1001_name &&
+ server->workstation_RFC1001_name[0] != 0)
+ rfc1002mangle(ses_init_buf->trailer.
+ session_req.calling_name,
+ server->workstation_RFC1001_name,
+ RFC1001_NAME_LEN_WITH_NULL);
+ else
+ rfc1002mangle(ses_init_buf->trailer.
+ session_req.calling_name,
+ "LINUX_CIFS_CLNT",
+ RFC1001_NAME_LEN_WITH_NULL);
+
+ ses_init_buf->trailer.session_req.scope1 = 0;
+ ses_init_buf->trailer.session_req.scope2 = 0;
+ smb_buf = (struct smb_hdr *)ses_init_buf;
+
+ /* sizeof RFC1002_SESSION_REQUEST with no scope */
+ smb_buf->smb_buf_length = 0x81000044;
+ rc = smb_send(server, smb_buf, 0x44);
+ kfree(ses_init_buf);
+ /*
+ * RFC1001 layer in at least one server
+ * requires very short break before negprot
+ * presumably because not expecting negprot
+ * to follow so fast. This is a simple
+ * solution that works without
+ * complicating the code and causes no
+ * significant slowing down on mount
+ * for everyone else
+ */
+ usleep_range(1000, 2000);
+ }
+ /*
+ * else the negprot may still work without this
+ * even though malloc failed
+ */
+
+ return rc;
+}
+
+static int
+generic_ip_connect(struct TCP_Server_Info *server)
{
int rc = 0;
- int val;
- bool connected = false;
- __be16 orig_port = 0;
+ unsigned short int sport;
+ int slen, sfamily;
struct socket *socket = server->ssocket;
+ struct sockaddr *saddr;
+
+ saddr = (struct sockaddr *) &server->dstaddr;
+
+ if (server->dstaddr.ss_family == AF_INET6) {
+ sport = ((struct sockaddr_in6 *) saddr)->sin6_port;
+ slen = sizeof(struct sockaddr_in6);
+ sfamily = AF_INET6;
+ } else {
+ sport = ((struct sockaddr_in *) saddr)->sin_port;
+ slen = sizeof(struct sockaddr_in);
+ sfamily = AF_INET;
+ }
if (socket == NULL) {
- rc = sock_create_kern(PF_INET, SOCK_STREAM,
+ rc = sock_create_kern(sfamily, SOCK_STREAM,
IPPROTO_TCP, &socket);
if (rc < 0) {
cERROR(1, "Error %d creating socket", rc);
+ server->ssocket = NULL;
return rc;
}
@@ -2126,63 +2277,28 @@ ipv4_connect(struct TCP_Server_Info *server)
cFYI(1, "Socket created");
server->ssocket = socket;
socket->sk->sk_allocation = GFP_NOFS;
- cifs_reclassify_socket4(socket);
+ if (sfamily == AF_INET6)
+ cifs_reclassify_socket6(socket);
+ else
+ cifs_reclassify_socket4(socket);
}
rc = bind_socket(server);
if (rc < 0)
return rc;
- /* user overrode default port */
- if (server->addr.sockAddr.sin_port) {
- rc = socket->ops->connect(socket, (struct sockaddr *)
- &server->addr.sockAddr,
- sizeof(struct sockaddr_in), 0);
- if (rc >= 0)
- connected = true;
- }
-
- if (!connected) {
- /* save original port so we can retry user specified port
- later if fall back ports fail this time */
- orig_port = server->addr.sockAddr.sin_port;
-
- /* do not retry on the same port we just failed on */
- if (server->addr.sockAddr.sin_port != htons(CIFS_PORT)) {
- server->addr.sockAddr.sin_port = htons(CIFS_PORT);
- rc = socket->ops->connect(socket,
- (struct sockaddr *)
- &server->addr.sockAddr,
- sizeof(struct sockaddr_in), 0);
- if (rc >= 0)
- connected = true;
- }
- }
- if (!connected) {
- server->addr.sockAddr.sin_port = htons(RFC1001_PORT);
- rc = socket->ops->connect(socket, (struct sockaddr *)
- &server->addr.sockAddr,
- sizeof(struct sockaddr_in), 0);
- if (rc >= 0)
- connected = true;
- }
-
- /* give up here - unless we want to retry on different
- protocol families some day */
- if (!connected) {
- if (orig_port)
- server->addr.sockAddr.sin_port = orig_port;
- cFYI(1, "Error %d connecting to server via ipv4", rc);
+ rc = socket->ops->connect(socket, saddr, slen, 0);
+ if (rc < 0) {
+ cFYI(1, "Error %d connecting to server", rc);
sock_release(socket);
server->ssocket = NULL;
return rc;
}
-
/*
* Eventually check for other socket options to change from
- * the default. sock_setsockopt not used because it expects
- * user space buffer
+ * the default. sock_setsockopt not used because it expects
+ * user space buffer
*/
socket->sk->sk_rcvtimeo = 7 * HZ;
socket->sk->sk_sndtimeo = 5 * HZ;
@@ -2196,7 +2312,7 @@ ipv4_connect(struct TCP_Server_Info *server)
}
if (server->tcp_nodelay) {
- val = 1;
+ int val = 1;
rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
(char *)&val, sizeof(val));
if (rc)
@@ -2207,161 +2323,39 @@ ipv4_connect(struct TCP_Server_Info *server)
socket->sk->sk_sndbuf,
socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo);
- /* send RFC1001 sessinit */
- if (server->addr.sockAddr.sin_port == htons(RFC1001_PORT)) {
- /* some servers require RFC1001 sessinit before sending
- negprot - BB check reconnection in case where second
- sessinit is sent but no second negprot */
- struct rfc1002_session_packet *ses_init_buf;
- struct smb_hdr *smb_buf;
- ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet),
- GFP_KERNEL);
- if (ses_init_buf) {
- ses_init_buf->trailer.session_req.called_len = 32;
- if (server->server_RFC1001_name &&
- server->server_RFC1001_name[0] != 0)
- rfc1002mangle(ses_init_buf->trailer.
- session_req.called_name,
- server->server_RFC1001_name,
- RFC1001_NAME_LEN_WITH_NULL);
- else
- rfc1002mangle(ses_init_buf->trailer.
- session_req.called_name,
- DEFAULT_CIFS_CALLED_NAME,
- RFC1001_NAME_LEN_WITH_NULL);
-
- ses_init_buf->trailer.session_req.calling_len = 32;
-
- /* calling name ends in null (byte 16) from old smb
- convention. */
- if (server->workstation_RFC1001_name &&
- server->workstation_RFC1001_name[0] != 0)
- rfc1002mangle(ses_init_buf->trailer.
- session_req.calling_name,
- server->workstation_RFC1001_name,
- RFC1001_NAME_LEN_WITH_NULL);
- else
- rfc1002mangle(ses_init_buf->trailer.
- session_req.calling_name,
- "LINUX_CIFS_CLNT",
- RFC1001_NAME_LEN_WITH_NULL);
-
- ses_init_buf->trailer.session_req.scope1 = 0;
- ses_init_buf->trailer.session_req.scope2 = 0;
- smb_buf = (struct smb_hdr *)ses_init_buf;
- /* sizeof RFC1002_SESSION_REQUEST with no scope */
- smb_buf->smb_buf_length = 0x81000044;
- rc = smb_send(server, smb_buf, 0x44);
- kfree(ses_init_buf);
- msleep(1); /* RFC1001 layer in at least one server
- requires very short break before negprot
- presumably because not expecting negprot
- to follow so fast. This is a simple
- solution that works without
- complicating the code and causes no
- significant slowing down on mount
- for everyone else */
- }
- /* else the negprot may still work without this
- even though malloc failed */
-
- }
+ if (sport == htons(RFC1001_PORT))
+ rc = ip_rfc1001_connect(server);
return rc;
}
static int
-ipv6_connect(struct TCP_Server_Info *server)
+ip_connect(struct TCP_Server_Info *server)
{
- int rc = 0;
- int val;
- bool connected = false;
- __be16 orig_port = 0;
- struct socket *socket = server->ssocket;
+ unsigned short int *sport;
+ struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
+ struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
- if (socket == NULL) {
- rc = sock_create_kern(PF_INET6, SOCK_STREAM,
- IPPROTO_TCP, &socket);
- if (rc < 0) {
- cERROR(1, "Error %d creating ipv6 socket", rc);
- socket = NULL;
- return rc;
- }
+ if (server->dstaddr.ss_family == AF_INET6)
+ sport = &addr6->sin6_port;
+ else
+ sport = &addr->sin_port;
- /* BB other socket options to set KEEPALIVE, NODELAY? */
- cFYI(1, "ipv6 Socket created");
- server->ssocket = socket;
- socket->sk->sk_allocation = GFP_NOFS;
- cifs_reclassify_socket6(socket);
- }
+ if (*sport == 0) {
+ int rc;
- rc = bind_socket(server);
- if (rc < 0)
- return rc;
+ /* try with 445 port at first */
+ *sport = htons(CIFS_PORT);
- /* user overrode default port */
- if (server->addr.sockAddr6.sin6_port) {
- rc = socket->ops->connect(socket,
- (struct sockaddr *) &server->addr.sockAddr6,
- sizeof(struct sockaddr_in6), 0);
+ rc = generic_ip_connect(server);
if (rc >= 0)
- connected = true;
- }
-
- if (!connected) {
- /* save original port so we can retry user specified port
- later if fall back ports fail this time */
-
- orig_port = server->addr.sockAddr6.sin6_port;
- /* do not retry on the same port we just failed on */
- if (server->addr.sockAddr6.sin6_port != htons(CIFS_PORT)) {
- server->addr.sockAddr6.sin6_port = htons(CIFS_PORT);
- rc = socket->ops->connect(socket, (struct sockaddr *)
- &server->addr.sockAddr6,
- sizeof(struct sockaddr_in6), 0);
- if (rc >= 0)
- connected = true;
- }
- }
- if (!connected) {
- server->addr.sockAddr6.sin6_port = htons(RFC1001_PORT);
- rc = socket->ops->connect(socket, (struct sockaddr *)
- &server->addr.sockAddr6,
- sizeof(struct sockaddr_in6), 0);
- if (rc >= 0)
- connected = true;
- }
-
- /* give up here - unless we want to retry on different
- protocol families some day */
- if (!connected) {
- if (orig_port)
- server->addr.sockAddr6.sin6_port = orig_port;
- cFYI(1, "Error %d connecting to server via ipv6", rc);
- sock_release(socket);
- server->ssocket = NULL;
- return rc;
- }
-
- /*
- * Eventually check for other socket options to change from
- * the default. sock_setsockopt not used because it expects
- * user space buffer
- */
- socket->sk->sk_rcvtimeo = 7 * HZ;
- socket->sk->sk_sndtimeo = 5 * HZ;
+ return rc;
- if (server->tcp_nodelay) {
- val = 1;
- rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
- (char *)&val, sizeof(val));
- if (rc)
- cFYI(1, "set TCP_NODELAY socket option error %d", rc);
+ /* if it failed, try with 139 port */
+ *sport = htons(RFC1001_PORT);
}
- server->ssocket = socket;
-
- return rc;
+ return generic_ip_connect(server);
}
void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
@@ -2551,6 +2545,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
cFYI(1, "file mode: 0x%x dir mode: 0x%x",
cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
+ cifs_sb->actimeo = pvolume_info->actimeo;
+
if (pvolume_info->noperm)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
if (pvolume_info->setuids)
@@ -2801,13 +2797,13 @@ remote_path_check:
/* check if a whole path (including prepath) is not remote */
if (!rc && cifs_sb->prepathlen && tcon) {
/* build_path_to_root works only when we have a valid tcon */
- full_path = cifs_build_path_to_root(cifs_sb);
+ full_path = cifs_build_path_to_root(cifs_sb, tcon);
if (full_path == NULL) {
rc = -ENOMEM;
goto mount_fail_check;
}
rc = is_path_accessible(xid, tcon, cifs_sb, full_path);
- if (rc != -EREMOTE) {
+ if (rc != 0 && rc != -EREMOTE) {
kfree(full_path);
goto mount_fail_check;
}
@@ -2886,24 +2882,16 @@ remote_path_check:
goto mount_fail_check;
}
- tlink->tl_index = pSesInfo->linux_uid;
+ tlink->tl_uid = pSesInfo->linux_uid;
tlink->tl_tcon = tcon;
tlink->tl_time = jiffies;
set_bit(TCON_LINK_MASTER, &tlink->tl_flags);
set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
- rc = radix_tree_preload(GFP_KERNEL);
- if (rc == -ENOMEM) {
- kfree(tlink);
- goto mount_fail_check;
- }
-
+ cifs_sb->master_tlink = tlink;
spin_lock(&cifs_sb->tlink_tree_lock);
- radix_tree_insert(&cifs_sb->tlink_tree, pSesInfo->linux_uid, tlink);
- radix_tree_tag_set(&cifs_sb->tlink_tree, pSesInfo->linux_uid,
- CIFS_TLINK_MASTER_TAG);
+ tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
spin_unlock(&cifs_sb->tlink_tree_lock);
- radix_tree_preload_end();
queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
TLINK_IDLE_EXPIRE);
@@ -2985,13 +2973,13 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
#ifdef CONFIG_CIFS_WEAK_PW_HASH
if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
(ses->server->secType == LANMAN))
- calc_lanman_hash(tcon->password, ses->cryptKey,
+ calc_lanman_hash(tcon->password, ses->server->cryptkey,
ses->server->secMode &
SECMODE_PW_ENCRYPT ? true : false,
bcc_ptr);
else
#endif /* CIFS_WEAK_PW_HASH */
- SMBNTencrypt(tcon->password, ses->cryptKey, bcc_ptr);
+ SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr);
bcc_ptr += CIFS_SESS_KEY_SIZE;
if (ses->capabilities & CAP_UNICODE) {
@@ -3093,32 +3081,25 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
int
cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
{
- int i, ret;
+ struct rb_root *root = &cifs_sb->tlink_tree;
+ struct rb_node *node;
+ struct tcon_link *tlink;
char *tmp;
- struct tcon_link *tlink[8];
- unsigned long index = 0;
cancel_delayed_work_sync(&cifs_sb->prune_tlinks);
- do {
- spin_lock(&cifs_sb->tlink_tree_lock);
- ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree,
- (void **)tlink, index,
- ARRAY_SIZE(tlink));
- /* increment index for next pass */
- if (ret > 0)
- index = tlink[ret - 1]->tl_index + 1;
- for (i = 0; i < ret; i++) {
- cifs_get_tlink(tlink[i]);
- clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags);
- radix_tree_delete(&cifs_sb->tlink_tree,
- tlink[i]->tl_index);
- }
- spin_unlock(&cifs_sb->tlink_tree_lock);
+ spin_lock(&cifs_sb->tlink_tree_lock);
+ while ((node = rb_first(root))) {
+ tlink = rb_entry(node, struct tcon_link, tl_rbnode);
+ cifs_get_tlink(tlink);
+ clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
+ rb_erase(node, root);
- for (i = 0; i < ret; i++)
- cifs_put_tlink(tlink[i]);
- } while (ret != 0);
+ spin_unlock(&cifs_sb->tlink_tree_lock);
+ cifs_put_tlink(tlink);
+ spin_lock(&cifs_sb->tlink_tree_lock);
+ }
+ spin_unlock(&cifs_sb->tlink_tree_lock);
tmp = cifs_sb->prepath;
cifs_sb->prepathlen = 0;
@@ -3178,10 +3159,11 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
} else {
mutex_lock(&ses->server->srv_mutex);
if (!server->session_estab) {
- memcpy(&server->session_key.data,
- &ses->auth_key.data, ses->auth_key.len);
+ server->session_key.response = ses->auth_key.response;
server->session_key.len = ses->auth_key.len;
- ses->server->session_estab = true;
+ server->sequence_number = 0x2;
+ server->session_estab = true;
+ ses->auth_key.response = NULL;
}
mutex_unlock(&server->srv_mutex);
@@ -3192,6 +3174,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
spin_unlock(&GlobalMid_Lock);
}
+ kfree(ses->auth_key.response);
+ ses->auth_key.response = NULL;
+ ses->auth_key.len = 0;
+ kfree(ses->ntlmssp);
+ ses->ntlmssp = NULL;
+
return rc;
}
@@ -3250,22 +3238,10 @@ out:
return tcon;
}
-static struct tcon_link *
+static inline struct tcon_link *
cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
{
- struct tcon_link *tlink;
- unsigned int ret;
-
- spin_lock(&cifs_sb->tlink_tree_lock);
- ret = radix_tree_gang_lookup_tag(&cifs_sb->tlink_tree, (void **)&tlink,
- 0, 1, CIFS_TLINK_MASTER_TAG);
- spin_unlock(&cifs_sb->tlink_tree_lock);
-
- /* the master tcon should always be present */
- if (ret == 0)
- BUG();
-
- return tlink;
+ return cifs_sb->master_tlink;
}
struct cifsTconInfo *
@@ -3281,6 +3257,47 @@ cifs_sb_tcon_pending_wait(void *unused)
return signal_pending(current) ? -ERESTARTSYS : 0;
}
+/* find and return a tlink with given uid */
+static struct tcon_link *
+tlink_rb_search(struct rb_root *root, uid_t uid)
+{
+ struct rb_node *node = root->rb_node;
+ struct tcon_link *tlink;
+
+ while (node) {
+ tlink = rb_entry(node, struct tcon_link, tl_rbnode);
+
+ if (tlink->tl_uid > uid)
+ node = node->rb_left;
+ else if (tlink->tl_uid < uid)
+ node = node->rb_right;
+ else
+ return tlink;
+ }
+ return NULL;
+}
+
+/* insert a tcon_link into the tree */
+static void
+tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+ struct tcon_link *tlink;
+
+ while (*new) {
+ tlink = rb_entry(*new, struct tcon_link, tl_rbnode);
+ parent = *new;
+
+ if (tlink->tl_uid > new_tlink->tl_uid)
+ new = &((*new)->rb_left);
+ else
+ new = &((*new)->rb_right);
+ }
+
+ rb_link_node(&new_tlink->tl_rbnode, parent, new);
+ rb_insert_color(&new_tlink->tl_rbnode, root);
+}
+
/*
* Find or construct an appropriate tcon given a cifs_sb and the fsuid of the
* current task.
@@ -3288,7 +3305,7 @@ cifs_sb_tcon_pending_wait(void *unused)
* If the superblock doesn't refer to a multiuser mount, then just return
* the master tcon for the mount.
*
- * First, search the radix tree for an existing tcon for this fsuid. If one
+ * First, search the rbtree for an existing tcon for this fsuid. If one
* exists, then check to see if it's pending construction. If it is then wait
* for construction to complete. Once it's no longer pending, check to see if
* it failed and either return an error or retry construction, depending on
@@ -3301,14 +3318,14 @@ struct tcon_link *
cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
{
int ret;
- unsigned long fsuid = (unsigned long) current_fsuid();
+ uid_t fsuid = current_fsuid();
struct tcon_link *tlink, *newtlink;
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
spin_lock(&cifs_sb->tlink_tree_lock);
- tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid);
+ tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
if (tlink)
cifs_get_tlink(tlink);
spin_unlock(&cifs_sb->tlink_tree_lock);
@@ -3317,36 +3334,24 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
newtlink = kzalloc(sizeof(*tlink), GFP_KERNEL);
if (newtlink == NULL)
return ERR_PTR(-ENOMEM);
- newtlink->tl_index = fsuid;
+ newtlink->tl_uid = fsuid;
newtlink->tl_tcon = ERR_PTR(-EACCES);
set_bit(TCON_LINK_PENDING, &newtlink->tl_flags);
set_bit(TCON_LINK_IN_TREE, &newtlink->tl_flags);
cifs_get_tlink(newtlink);
- ret = radix_tree_preload(GFP_KERNEL);
- if (ret != 0) {
- kfree(newtlink);
- return ERR_PTR(ret);
- }
-
spin_lock(&cifs_sb->tlink_tree_lock);
/* was one inserted after previous search? */
- tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid);
+ tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
if (tlink) {
cifs_get_tlink(tlink);
spin_unlock(&cifs_sb->tlink_tree_lock);
- radix_tree_preload_end();
kfree(newtlink);
goto wait_for_construction;
}
- ret = radix_tree_insert(&cifs_sb->tlink_tree, fsuid, newtlink);
- spin_unlock(&cifs_sb->tlink_tree_lock);
- radix_tree_preload_end();
- if (ret) {
- kfree(newtlink);
- return ERR_PTR(ret);
- }
tlink = newtlink;
+ tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
+ spin_unlock(&cifs_sb->tlink_tree_lock);
} else {
wait_for_construction:
ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
@@ -3392,39 +3397,39 @@ cifs_prune_tlinks(struct work_struct *work)
{
struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info,
prune_tlinks.work);
- struct tcon_link *tlink[8];
- unsigned long now = jiffies;
- unsigned long index = 0;
- int i, ret;
+ struct rb_root *root = &cifs_sb->tlink_tree;
+ struct rb_node *node = rb_first(root);
+ struct rb_node *tmp;
+ struct tcon_link *tlink;
- do {
- spin_lock(&cifs_sb->tlink_tree_lock);
- ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree,
- (void **)tlink, index,
- ARRAY_SIZE(tlink));
- /* increment index for next pass */
- if (ret > 0)
- index = tlink[ret - 1]->tl_index + 1;
- for (i = 0; i < ret; i++) {
- if (test_bit(TCON_LINK_MASTER, &tlink[i]->tl_flags) ||
- atomic_read(&tlink[i]->tl_count) != 0 ||
- time_after(tlink[i]->tl_time + TLINK_IDLE_EXPIRE,
- now)) {
- tlink[i] = NULL;
- continue;
- }
- cifs_get_tlink(tlink[i]);
- clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags);
- radix_tree_delete(&cifs_sb->tlink_tree,
- tlink[i]->tl_index);
- }
- spin_unlock(&cifs_sb->tlink_tree_lock);
+ /*
+ * Because we drop the spinlock in the loop in order to put the tlink
+ * it's not guarded against removal of links from the tree. The only
+ * places that remove entries from the tree are this function and
+ * umounts. Because this function is non-reentrant and is canceled
+ * before umount can proceed, this is safe.
+ */
+ spin_lock(&cifs_sb->tlink_tree_lock);
+ node = rb_first(root);
+ while (node != NULL) {
+ tmp = node;
+ node = rb_next(tmp);
+ tlink = rb_entry(tmp, struct tcon_link, tl_rbnode);
+
+ if (test_bit(TCON_LINK_MASTER, &tlink->tl_flags) ||
+ atomic_read(&tlink->tl_count) != 0 ||
+ time_after(tlink->tl_time + TLINK_IDLE_EXPIRE, jiffies))
+ continue;
- for (i = 0; i < ret; i++) {
- if (tlink[i] != NULL)
- cifs_put_tlink(tlink[i]);
- }
- } while (ret != 0);
+ cifs_get_tlink(tlink);
+ clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
+ rb_erase(tmp, root);
+
+ spin_unlock(&cifs_sb->tlink_tree_lock);
+ cifs_put_tlink(tlink);
+ spin_lock(&cifs_sb->tlink_tree_lock);
+ }
+ spin_unlock(&cifs_sb->tlink_tree_lock);
queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
TLINK_IDLE_EXPIRE);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3840edd..2e77382 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon,
struct inode *newinode)
{
if (tcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
d_instantiate(direntry, newinode);
}
@@ -293,10 +293,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
args.uid = NO_CHANGE_64;
args.gid = NO_CHANGE_64;
}
- CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ CIFSSMBUnixSetFileInfo(xid, tcon, &args, fileHandle,
+ current->tgid);
} else {
/* BB implement mode setting via Windows security
descriptors e.g. */
@@ -421,9 +419,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
rc = cifs_get_inode_info_unix(&newinode, full_path,
inode->i_sb, xid);
if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
if (rc == 0)
d_instantiate(direntry, newinode);
@@ -604,9 +602,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
if ((rc == 0) && (newInode != NULL)) {
if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
d_add(direntry, newInode);
if (posix_open) {
filp = lookup_instantiate_filp(nd, direntry,
@@ -634,9 +632,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
rc = 0;
direntry->d_time = jiffies;
if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
d_add(direntry, NULL);
/* if it was once a directory (but how can we tell?) we could do
shrink_dcache_parent(direntry); */
@@ -656,22 +654,37 @@ lookup_out:
static int
cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
{
- int isValid = 1;
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
if (direntry->d_inode) {
if (cifs_revalidate_dentry(direntry))
return 0;
- } else {
- cFYI(1, "neg dentry 0x%p name = %s",
- direntry, direntry->d_name.name);
- if (time_after(jiffies, direntry->d_time + HZ) ||
- !lookupCacheEnabled) {
- d_drop(direntry);
- isValid = 0;
- }
+ else
+ return 1;
+ }
+
+ /*
+ * This may be nfsd (or something), anyway, we can't see the
+ * intent of this. So, since this can be for creation, drop it.
+ */
+ if (!nd)
+ return 0;
+
+ /*
+ * Drop the negative dentry, in order to make sure to use the
+ * case sensitive name which is specified by user if this is
+ * for creation.
+ */
+ if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
+ if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+ return 0;
}
- return isValid;
+ if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled)
+ return 0;
+
+ return 1;
}
/* static int cifs_d_delete(struct dentry *direntry)
@@ -688,9 +701,10 @@ const struct dentry_operations cifs_dentry_ops = {
/* d_delete: cifs_d_delete, */ /* not needed except for debugging */
};
-static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
+static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *q)
{
- struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls;
+ struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls;
unsigned long hash;
int i;
@@ -703,21 +717,16 @@ static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
return 0;
}
-static int cifs_ci_compare(struct dentry *dentry, struct qstr *a,
- struct qstr *b)
+static int cifs_ci_compare(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls;
-
- if ((a->len == b->len) &&
- (nls_strnicmp(codepage, a->name, b->name, a->len) == 0)) {
- /*
- * To preserve case, don't let an existing negative dentry's
- * case take precedence. If a is not a negative dentry, this
- * should have no side effects
- */
- memcpy((void *)a->name, b->name, a->len);
+ struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls;
+
+ if ((name->len == len) &&
+ (nls_strnicmp(codepage, name->name, str, len) == 0))
return 0;
- }
return 1;
}
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 0eb8702..548f062 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -66,7 +66,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
/* Search for server name delimiter */
sep = memchr(hostname, '\\', len);
if (sep)
- len = sep - unc;
+ len = sep - hostname;
else
cFYI(1, "%s: probably server name is whole unc: %s",
__func__, unc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 45af003..d843631 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -104,59 +104,6 @@ static inline int cifs_get_disposition(unsigned int flags)
return FILE_OPEN;
}
-static inline int cifs_open_inode_helper(struct inode *inode,
- struct cifsTconInfo *pTcon, __u32 oplock, FILE_ALL_INFO *buf,
- char *full_path, int xid)
-{
- struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
- struct timespec temp;
- int rc;
-
- if (pCifsInode->clientCanCacheRead) {
- /* we have the inode open somewhere else
- no need to discard cache data */
- goto client_can_cache;
- }
-
- /* BB need same check in cifs_create too? */
- /* if not oplocked, invalidate inode pages if mtime or file
- size changed */
- temp = cifs_NTtimeToUnix(buf->LastWriteTime);
- if (timespec_equal(&inode->i_mtime, &temp) &&
- (inode->i_size ==
- (loff_t)le64_to_cpu(buf->EndOfFile))) {
- cFYI(1, "inode unchanged on server");
- } else {
- if (inode->i_mapping) {
- /* BB no need to lock inode until after invalidate
- since namei code should already have it locked? */
- rc = filemap_write_and_wait(inode->i_mapping);
- if (rc != 0)
- pCifsInode->write_behind_rc = rc;
- }
- cFYI(1, "invalidating remote inode since open detected it "
- "changed");
- invalidate_remote_inode(inode);
- }
-
-client_can_cache:
- if (pTcon->unix_ext)
- rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
- xid);
- else
- rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
- xid, NULL);
-
- if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
- pCifsInode->clientCanCacheAll = true;
- pCifsInode->clientCanCacheRead = true;
- cFYI(1, "Exclusive Oplock granted on inode %p", inode);
- } else if ((oplock & 0xF) == OPLOCK_READ)
- pCifsInode->clientCanCacheRead = true;
-
- return rc;
-}
-
int cifs_posix_open(char *full_path, struct inode **pinode,
struct super_block *sb, int mode, unsigned int f_flags,
__u32 *poplock, __u16 *pnetfid, int xid)
@@ -219,6 +166,76 @@ posix_open_ret:
return rc;
}
+static int
+cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
+ struct cifsTconInfo *tcon, unsigned int f_flags, __u32 *poplock,
+ __u16 *pnetfid, int xid)
+{
+ int rc;
+ int desiredAccess;
+ int disposition;
+ FILE_ALL_INFO *buf;
+
+ desiredAccess = cifs_convert_flags(f_flags);
+
+/*********************************************************************
+ * open flag mapping table:
+ *
+ * POSIX Flag CIFS Disposition
+ * ---------- ----------------
+ * O_CREAT FILE_OPEN_IF
+ * O_CREAT | O_EXCL FILE_CREATE
+ * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
+ * O_TRUNC FILE_OVERWRITE
+ * none of the above FILE_OPEN
+ *
+ * Note that there is not a direct match between disposition
+ * FILE_SUPERSEDE (ie create whether or not file exists although
+ * O_CREAT | O_TRUNC is similar but truncates the existing
+ * file rather than creating a new file as FILE_SUPERSEDE does
+ * (which uses the attributes / metadata passed in on open call)
+ *?
+ *? O_SYNC is a reasonable match to CIFS writethrough flag
+ *? and the read write flags match reasonably. O_LARGEFILE
+ *? is irrelevant because largefile support is always used
+ *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
+ * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
+ *********************************************************************/
+
+ disposition = cifs_get_disposition(f_flags);
+
+ /* BB pass O_SYNC flag through on file attributes .. BB */
+
+ buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (tcon->ses->capabilities & CAP_NT_SMBS)
+ rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
+ desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
+ & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ else
+ rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
+ desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
+ & CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+ if (rc)
+ goto out;
+
+ if (tcon->unix_ext)
+ rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
+ xid);
+ else
+ rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
+ xid, pnetfid);
+
+out:
+ kfree(buf);
+ return rc;
+}
+
struct cifsFileInfo *
cifs_new_fileinfo(__u16 fileHandle, struct file *file,
struct tcon_link *tlink, __u32 oplock)
@@ -232,6 +249,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
if (pCifsFile == NULL)
return pCifsFile;
+ pCifsFile->count = 1;
pCifsFile->netfid = fileHandle;
pCifsFile->pid = current->tgid;
pCifsFile->uid = current_fsuid();
@@ -242,7 +260,6 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
mutex_init(&pCifsFile->fh_mutex);
mutex_init(&pCifsFile->lock_mutex);
INIT_LIST_HEAD(&pCifsFile->llist);
- atomic_set(&pCifsFile->count, 1);
INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
spin_lock(&cifs_file_list_lock);
@@ -254,12 +271,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
spin_unlock(&cifs_file_list_lock);
- if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
- pCifsInode->clientCanCacheAll = true;
- pCifsInode->clientCanCacheRead = true;
- cFYI(1, "Exclusive Oplock inode %p", inode);
- } else if ((oplock & 0xF) == OPLOCK_READ)
- pCifsInode->clientCanCacheRead = true;
+ cifs_set_oplock_level(pCifsInode, oplock);
file->private_data = pCifsFile;
return pCifsFile;
@@ -267,16 +279,18 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
/*
* Release a reference on the file private data. This may involve closing
- * the filehandle out on the server.
+ * the filehandle out on the server. Must be called without holding
+ * cifs_file_list_lock.
*/
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
{
+ struct inode *inode = cifs_file->dentry->d_inode;
struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink);
- struct cifsInodeInfo *cifsi = CIFS_I(cifs_file->dentry->d_inode);
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifsLockInfo *li, *tmp;
spin_lock(&cifs_file_list_lock);
- if (!atomic_dec_and_test(&cifs_file->count)) {
+ if (--cifs_file->count > 0) {
spin_unlock(&cifs_file_list_lock);
return;
}
@@ -288,8 +302,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
if (list_empty(&cifsi->openFileList)) {
cFYI(1, "closing last open instance for inode %p",
cifs_file->dentry->d_inode);
- cifsi->clientCanCacheRead = false;
- cifsi->clientCanCacheAll = false;
+ cifs_set_oplock_level(cifsi, 0);
}
spin_unlock(&cifs_file_list_lock);
@@ -327,10 +340,8 @@ int cifs_open(struct inode *inode, struct file *file)
struct cifsFileInfo *pCifsFile = NULL;
struct cifsInodeInfo *pCifsInode;
char *full_path = NULL;
- int desiredAccess;
- int disposition;
+ bool posix_open_ok = false;
__u16 netfid;
- FILE_ALL_INFO *buf = NULL;
xid = GetXid();
@@ -368,17 +379,7 @@ int cifs_open(struct inode *inode, struct file *file)
file->f_flags, &oplock, &netfid, xid);
if (rc == 0) {
cFYI(1, "posix open succeeded");
-
- pCifsFile = cifs_new_fileinfo(netfid, file, tlink,
- oplock);
- if (pCifsFile == NULL) {
- CIFSSMBClose(xid, tcon, netfid);
- rc = -ENOMEM;
- }
-
- cifs_fscache_set_inode_cookie(inode, file);
-
- goto out;
+ posix_open_ok = true;
} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
if (tcon->ses->serverNOS)
cERROR(1, "server %s of type %s returned"
@@ -395,103 +396,39 @@ int cifs_open(struct inode *inode, struct file *file)
or DFS errors */
}
- desiredAccess = cifs_convert_flags(file->f_flags);
-
-/*********************************************************************
- * open flag mapping table:
- *
- * POSIX Flag CIFS Disposition
- * ---------- ----------------
- * O_CREAT FILE_OPEN_IF
- * O_CREAT | O_EXCL FILE_CREATE
- * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
- * O_TRUNC FILE_OVERWRITE
- * none of the above FILE_OPEN
- *
- * Note that there is not a direct match between disposition
- * FILE_SUPERSEDE (ie create whether or not file exists although
- * O_CREAT | O_TRUNC is similar but truncates the existing
- * file rather than creating a new file as FILE_SUPERSEDE does
- * (which uses the attributes / metadata passed in on open call)
- *?
- *? O_SYNC is a reasonable match to CIFS writethrough flag
- *? and the read write flags match reasonably. O_LARGEFILE
- *? is irrelevant because largefile support is always used
- *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
- * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
- *********************************************************************/
-
- disposition = cifs_get_disposition(file->f_flags);
-
- /* BB pass O_SYNC flag through on file attributes .. BB */
-
- /* Also refresh inode by passing in file_info buf returned by SMBOpen
- and calling get_inode_info with returned buf (at least helps
- non-Unix server case) */
-
- /* BB we can not do this if this is the second open of a file
- and the first handle has writebehind data, we might be
- able to simply do a filemap_fdatawrite/filemap_fdatawait first */
- buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
- if (!buf) {
- rc = -ENOMEM;
- goto out;
- }
-
- if (tcon->ses->capabilities & CAP_NT_SMBS)
- rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
- desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
- cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
- & CIFS_MOUNT_MAP_SPECIAL_CHR);
- else
- rc = -EIO; /* no NT SMB support fall into legacy open below */
-
- if (rc == -EIO) {
- /* Old server, try legacy style OpenX */
- rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
- desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
- cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
- & CIFS_MOUNT_MAP_SPECIAL_CHR);
- }
- if (rc) {
- cFYI(1, "cifs_open returned 0x%x", rc);
- goto out;
+ if (!posix_open_ok) {
+ rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
+ file->f_flags, &oplock, &netfid, xid);
+ if (rc)
+ goto out;
}
- rc = cifs_open_inode_helper(inode, tcon, oplock, buf, full_path, xid);
- if (rc != 0)
- goto out;
-
pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
if (pCifsFile == NULL) {
+ CIFSSMBClose(xid, tcon, netfid);
rc = -ENOMEM;
goto out;
}
cifs_fscache_set_inode_cookie(inode, file);
- if (oplock & CIFS_CREATE_ACTION) {
+ if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
/* time to set mode which we can not set earlier due to
problems creating new read-only files */
- if (tcon->unix_ext) {
- struct cifs_unix_set_info_args args = {
- .mode = inode->i_mode,
- .uid = NO_CHANGE_64,
- .gid = NO_CHANGE_64,
- .ctime = NO_CHANGE_64,
- .atime = NO_CHANGE_64,
- .mtime = NO_CHANGE_64,
- .device = 0,
- };
- CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- }
+ struct cifs_unix_set_info_args args = {
+ .mode = inode->i_mode,
+ .uid = NO_CHANGE_64,
+ .gid = NO_CHANGE_64,
+ .ctime = NO_CHANGE_64,
+ .atime = NO_CHANGE_64,
+ .mtime = NO_CHANGE_64,
+ .device = 0,
+ };
+ CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
+ pCifsFile->pid);
}
out:
- kfree(buf);
kfree(full_path);
FreeXid(xid);
cifs_put_tlink(tlink);
@@ -605,11 +542,8 @@ reopen_success:
if (can_flush) {
rc = filemap_write_and_wait(inode->i_mapping);
- if (rc != 0)
- CIFS_I(inode)->write_behind_rc = rc;
+ mapping_set_error(inode->i_mapping, rc);
- pCifsInode->clientCanCacheAll = false;
- pCifsInode->clientCanCacheRead = false;
if (tcon->unix_ext)
rc = cifs_get_inode_info_unix(&inode,
full_path, inode->i_sb, xid);
@@ -623,18 +557,9 @@ reopen_success:
invalidate the current end of file on the server
we can not go to the server to get the new inod
info */
- if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
- pCifsInode->clientCanCacheAll = true;
- pCifsInode->clientCanCacheRead = true;
- cFYI(1, "Exclusive Oplock granted on inode %p",
- pCifsFile->dentry->d_inode);
- } else if ((oplock & 0xF) == OPLOCK_READ) {
- pCifsInode->clientCanCacheRead = true;
- pCifsInode->clientCanCacheAll = false;
- } else {
- pCifsInode->clientCanCacheRead = false;
- pCifsInode->clientCanCacheAll = false;
- }
+
+ cifs_set_oplock_level(pCifsInode, oplock);
+
cifs_relock_file(pCifsFile);
reopen_error_exit:
@@ -776,12 +701,6 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink);
-
- if (file->private_data == NULL) {
- rc = -EBADF;
- FreeXid(xid);
- return rc;
- }
netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
if ((tcon->ses->capabilities & CAP_UNIX) &&
@@ -957,6 +876,7 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
ssize_t cifs_user_write(struct file *file, const char __user *write_data,
size_t write_size, loff_t *poffset)
{
+ struct inode *inode = file->f_path.dentry->d_inode;
int rc = 0;
unsigned int bytes_written = 0;
unsigned int total_written;
@@ -964,7 +884,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
struct cifsTconInfo *pTcon;
int xid, long_op;
struct cifsFileInfo *open_file;
- struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -1030,21 +950,17 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
cifs_stats_bytes_written(pTcon, total_written);
- /* since the write may have blocked check these pointers again */
- if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
- struct inode *inode = file->f_path.dentry->d_inode;
/* Do not update local mtime - server will set its actual value on write
- * inode->i_ctime = inode->i_mtime =
- * current_fs_time(inode->i_sb);*/
- if (total_written > 0) {
- spin_lock(&inode->i_lock);
- if (*poffset > file->f_path.dentry->d_inode->i_size)
- i_size_write(file->f_path.dentry->d_inode,
- *poffset);
- spin_unlock(&inode->i_lock);
- }
- mark_inode_dirty_sync(file->f_path.dentry->d_inode);
+ * inode->i_ctime = inode->i_mtime =
+ * current_fs_time(inode->i_sb);*/
+ if (total_written > 0) {
+ spin_lock(&inode->i_lock);
+ if (*poffset > inode->i_size)
+ i_size_write(inode, *poffset);
+ spin_unlock(&inode->i_lock);
}
+ mark_inode_dirty_sync(inode);
+
FreeXid(xid);
return total_written;
}
@@ -1139,7 +1055,6 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
return total_written;
}
-#ifdef CONFIG_CIFS_EXPERIMENTAL
struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
bool fsuid_only)
{
@@ -1173,13 +1088,12 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
spin_unlock(&cifs_file_list_lock);
return NULL;
}
-#endif
struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
bool fsuid_only)
{
struct cifsFileInfo *open_file;
- struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+ struct cifs_sb_info *cifs_sb;
bool any_available = false;
int rc;
@@ -1193,6 +1107,8 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
return NULL;
}
+ cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+
/* only filter by fsuid on multiuser mounts */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
fsuid_only = false;
@@ -1353,6 +1269,7 @@ static int cifs_writepages(struct address_space *mapping,
if (!experimEnabled && tcon->ses->server->secMode &
(SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
cifsFileInfo_put(open_file);
+ kfree(iov);
return generic_writepages(mapping, wbc);
}
cifsFileInfo_put(open_file);
@@ -1478,12 +1395,7 @@ retry:
if (rc || bytes_written < bytes_to_write) {
cERROR(1, "Write2 ret %d, wrote %d",
rc, bytes_written);
- /* BB what if continued retry is
- requested via mount flags? */
- if (rc == -ENOSPC)
- set_bit(AS_ENOSPC, &mapping->flags);
- else
- set_bit(AS_EIO, &mapping->flags);
+ mapping_set_error(mapping, rc);
} else {
cifs_stats_bytes_written(tcon, bytes_written);
}
@@ -1628,11 +1540,10 @@ int cifs_fsync(struct file *file, int datasync)
rc = filemap_write_and_wait(inode->i_mapping);
if (rc == 0) {
- rc = CIFS_I(inode)->write_behind_rc;
- CIFS_I(inode)->write_behind_rc = 0;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
tcon = tlink_tcon(smbfile->tlink);
- if (!rc && tcon && smbfile &&
- !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
}
@@ -1677,21 +1588,8 @@ int cifs_flush(struct file *file, fl_owner_t id)
struct inode *inode = file->f_path.dentry->d_inode;
int rc = 0;
- /* Rather than do the steps manually:
- lock the inode for writing
- loop through pages looking for write behind data (dirty pages)
- coalesce into contiguous 16K (or smaller) chunks to write to server
- send to server (prefer in parallel)
- deal with writebehind errors
- unlock inode for writing
- filemapfdatawrite appears easier for the time being */
-
- rc = filemap_fdatawrite(inode->i_mapping);
- /* reset wb rc if we were able to write out dirty pages */
- if (!rc) {
- rc = CIFS_I(inode)->write_behind_rc;
- CIFS_I(inode)->write_behind_rc = 0;
- }
+ if (file->f_mode & FMODE_WRITE)
+ rc = filemap_write_and_wait(inode->i_mapping);
cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
@@ -2270,7 +2168,7 @@ void cifs_oplock_break(struct work_struct *work)
oplock_break);
struct inode *inode = cfile->dentry->d_inode;
struct cifsInodeInfo *cinode = CIFS_I(inode);
- int rc, waitrc = 0;
+ int rc = 0;
if (inode && S_ISREG(inode->i_mode)) {
if (cinode->clientCanCacheRead)
@@ -2279,13 +2177,10 @@ void cifs_oplock_break(struct work_struct *work)
break_lease(inode, O_WRONLY);
rc = filemap_fdatawrite(inode->i_mapping);
if (cinode->clientCanCacheRead == 0) {
- waitrc = filemap_fdatawait(inode->i_mapping);
+ rc = filemap_fdatawait(inode->i_mapping);
+ mapping_set_error(inode->i_mapping, rc);
invalidate_remote_inode(inode);
}
- if (!rc)
- rc = waitrc;
- if (rc)
- cinode->write_behind_rc = rc;
cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
}
@@ -2304,7 +2199,7 @@ void cifs_oplock_break(struct work_struct *work)
/*
* We might have kicked in before is_valid_oplock_break()
* finished grabbing reference for us. Make sure it's done by
- * waiting for GlobalSMSSeslock.
+ * waiting for cifs_file_list_lock.
*/
spin_lock(&cifs_file_list_lock);
spin_unlock(&cifs_file_list_lock);
@@ -2312,6 +2207,7 @@ void cifs_oplock_break(struct work_struct *work)
cifs_oplock_break_put(cfile);
}
+/* must be called while holding cifs_file_list_lock */
void cifs_oplock_break_get(struct cifsFileInfo *cfile)
{
cifs_sb_active(cfile->dentry->d_sb);
@@ -2320,8 +2216,10 @@ void cifs_oplock_break_get(struct cifsFileInfo *cfile)
void cifs_oplock_break_put(struct cifsFileInfo *cfile)
{
+ struct super_block *sb = cfile->dentry->d_sb;
+
cifsFileInfo_put(cfile);
- cifs_sb_deactive(cfile->dentry->d_sb);
+ cifs_sb_deactive(sb);
}
const struct address_space_operations cifs_addr_ops = {
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index a2ad94e..297a43d 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -2,7 +2,7 @@
* fs/cifs/fscache.c - CIFS filesystem cache interface
*
* Copyright (c) 2010 Novell, Inc.
- * Author(s): Suresh Jayaraman (sjayaraman@suse.de>
+ * Author(s): Suresh Jayaraman <sjayaraman@suse.de>
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
@@ -67,10 +67,12 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
if (cifsi->fscache)
return;
- cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
+ cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
&cifs_fscache_inode_object_def, cifsi);
- cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache,
+ cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache,
cifsi->fscache);
+ }
}
void cifs_fscache_release_inode_cookie(struct inode *inode)
@@ -101,10 +103,8 @@ void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
{
if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
cifs_fscache_disable_inode_cookie(inode);
- else {
+ else
cifs_fscache_enable_inode_cookie(inode);
- cFYI(1, "CIFS: fscache inode cookie set");
- }
}
void cifs_fscache_reset_inode_cookie(struct inode *inode)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9497930..0c7e369 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -518,6 +518,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
fattr->cf_eof = le64_to_cpu(info->EndOfFile);
fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
+ fattr->cf_createtime = le64_to_cpu(info->CreationTime);
if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
@@ -686,13 +687,18 @@ int cifs_get_inode_info(struct inode **pinode,
cFYI(1, "cifs_sfu_type failed: %d", tmprc);
}
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_ACL
/* fill in 0777 bits from ACL */
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
- cFYI(1, "Getting mode bits from ACL");
- cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid);
+ rc = cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path,
+ pfid);
+ if (rc) {
+ cFYI(1, "%s: Getting ACL failed with error: %d",
+ __func__, rc);
+ goto cgii_exit;
+ }
}
-#endif
+#endif /* CONFIG_CIFS_ACL */
/* fill in remaining high mode bits e.g. SUID, VTX */
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
@@ -723,12 +729,12 @@ static const struct inode_operations cifs_ipc_inode_ops = {
.lookup = cifs_lookup,
};
-char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb)
+char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
+ struct cifsTconInfo *tcon)
{
int pplen = cifs_sb->prepathlen;
int dfsplen;
char *full_path = NULL;
- struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
/* if no prefix path, simply set path to the root of share to "" */
if (pplen == 0) {
@@ -774,6 +780,10 @@ cifs_find_inode(struct inode *inode, void *opaque)
if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
return 0;
+ /* use createtime like an i_generation field */
+ if (CIFS_I(inode)->createtime != fattr->cf_createtime)
+ return 0;
+
/* don't match inode of different type */
if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT))
return 0;
@@ -791,6 +801,7 @@ cifs_init_inode(struct inode *inode, void *opaque)
struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
CIFS_I(inode)->uniqueid = fattr->cf_uniqueid;
+ CIFS_I(inode)->createtime = fattr->cf_createtime;
return 0;
}
@@ -804,14 +815,14 @@ inode_has_hashed_dentries(struct inode *inode)
{
struct dentry *dentry;
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
return true;
}
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
return false;
}
@@ -870,7 +881,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
char *full_path;
struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
- full_path = cifs_build_path_to_root(cifs_sb);
+ full_path = cifs_build_path_to_root(cifs_sb, tcon);
if (full_path == NULL)
return ERR_PTR(-ENOMEM);
@@ -881,8 +892,10 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
xid, NULL);
- if (!inode)
- return ERR_PTR(rc);
+ if (!inode) {
+ inode = ERR_PTR(rc);
+ goto out;
+ }
#ifdef CONFIG_CIFS_FSCACHE
/* populate tcon->resource_id */
@@ -898,13 +911,11 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
inode->i_uid = cifs_sb->mnt_uid;
inode->i_gid = cifs_sb->mnt_gid;
} else if (rc) {
- kfree(full_path);
- _FreeXid(xid);
iget_failed(inode);
- return ERR_PTR(rc);
+ inode = ERR_PTR(rc);
}
-
+out:
kfree(full_path);
/* can not call macro FreeXid here since in a void func
* TODO: This is no longer true
@@ -1314,9 +1325,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
to set uid/gid */
inc_nlink(inode);
if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1358,9 +1369,9 @@ mkdir_get_info:
inode->i_sb, xid, NULL);
if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
d_instantiate(direntry, newinode);
/* setting nlink not necessary except in cases where we
* failed to get it from the server or was set bogus */
@@ -1648,6 +1659,7 @@ static bool
cifs_inode_needs_reval(struct inode *inode)
{
struct cifsInodeInfo *cifs_i = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
if (cifs_i->clientCanCacheRead)
return false;
@@ -1658,19 +1670,21 @@ cifs_inode_needs_reval(struct inode *inode)
if (cifs_i->time == 0)
return true;
- /* FIXME: the actimeo should be tunable */
- if (time_after_eq(jiffies, cifs_i->time + HZ))
+ if (!time_in_range(jiffies, cifs_i->time,
+ cifs_i->time + cifs_sb->actimeo))
return true;
/* hardlinked files w/ noserverino get "special" treatment */
- if (!(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
S_ISREG(inode->i_mode) && inode->i_nlink != 1)
return true;
return false;
}
-/* check invalid_mapping flag and zap the cache if it's set */
+/*
+ * Zap the cache. Called when invalid_mapping flag is set.
+ */
static void
cifs_invalidate_mapping(struct inode *inode)
{
@@ -1682,8 +1696,7 @@ cifs_invalidate_mapping(struct inode *inode)
/* write back any cached data */
if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
rc = filemap_write_and_wait(inode->i_mapping);
- if (rc)
- cifs_i->write_behind_rc = rc;
+ mapping_set_error(inode->i_mapping, rc);
}
invalidate_remote_inode(inode);
cifs_fscache_reset_inode_cookie(inode);
@@ -1943,10 +1956,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
* the flush returns error?
*/
rc = filemap_write_and_wait(inode->i_mapping);
- if (rc != 0) {
- cifsInode->write_behind_rc = rc;
- rc = 0;
- }
+ mapping_set_error(inode->i_mapping, rc);
+ rc = 0;
if (attrs->ia_valid & ATTR_SIZE) {
rc = cifs_set_file_size(inode, attrs, xid, full_path);
@@ -2087,10 +2098,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
* the flush returns error?
*/
rc = filemap_write_and_wait(inode->i_mapping);
- if (rc != 0) {
- cifsInode->write_behind_rc = rc;
- rc = 0;
- }
+ mapping_set_error(inode->i_mapping, rc);
+ rc = 0;
if (attrs->ia_valid & ATTR_SIZE) {
rc = cifs_set_file_size(inode, attrs, xid, full_path);
@@ -2119,11 +2128,16 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
if (attrs->ia_valid & ATTR_MODE) {
rc = 0;
-#ifdef CONFIG_CIFS_EXPERIMENTAL
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
- rc = mode_to_acl(inode, full_path, mode);
- else
-#endif
+#ifdef CONFIG_CIFS_ACL
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
+ rc = mode_to_cifs_acl(inode, full_path, mode);
+ if (rc) {
+ cFYI(1, "%s: Setting ACL failed with error: %d",
+ __func__, rc);
+ goto cifs_setattr_exit;
+ }
+ } else
+#endif /* CONFIG_CIFS_ACL */
if (((mode & S_IWUGO) == 0) &&
(cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
@@ -2182,7 +2196,6 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
setattr_copy(inode, attrs);
mark_inode_dirty(inode);
- return 0;
cifs_setattr_exit:
kfree(full_path);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 077bf75..0c98672 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -38,10 +38,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
struct cifs_sb_info *cifs_sb;
#ifdef CONFIG_CIFS_POSIX
struct cifsFileInfo *pSMBFile = filep->private_data;
- struct cifsTconInfo *tcon = tlink_tcon(pSMBFile->tlink);
+ struct cifsTconInfo *tcon;
__u64 ExtAttrBits = 0;
__u64 ExtAttrMask = 0;
- __u64 caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
+ __u64 caps;
#endif /* CONFIG_CIFS_POSIX */
xid = GetXid();
@@ -62,9 +62,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
break;
#ifdef CONFIG_CIFS_POSIX
case FS_IOC_GETFLAGS:
+ if (pSMBFile == NULL)
+ break;
+ tcon = tlink_tcon(pSMBFile->tlink);
+ caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
if (CIFS_UNIX_EXTATTR_CAP & caps) {
- if (pSMBFile == NULL)
- break;
rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid,
&ExtAttrBits, &ExtAttrMask);
if (rc == 0)
@@ -75,13 +77,15 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
break;
case FS_IOC_SETFLAGS:
+ if (pSMBFile == NULL)
+ break;
+ tcon = tlink_tcon(pSMBFile->tlink);
+ caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
if (CIFS_UNIX_EXTATTR_CAP & caps) {
if (get_user(ExtAttrBits, (int __user *)arg)) {
rc = -EFAULT;
break;
}
- if (pSMBFile == NULL)
- break;
/* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
extAttrBits, &ExtAttrMask);*/
}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 85cdbf8..fe2f6a9 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
rc);
} else {
if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
d_instantiate(direntry, newinode);
}
}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1c681f6..43f1028 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -569,15 +569,14 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
cFYI(1, "file id match, oplock break");
pCifsInode = CIFS_I(netfile->dentry->d_inode);
- pCifsInode->clientCanCacheAll = false;
- if (pSMB->OplockLevel == 0)
- pCifsInode->clientCanCacheRead = false;
+ cifs_set_oplock_level(pCifsInode,
+ pSMB->OplockLevel);
/*
* cifs_oplock_break_put() can't be called
* from here. Get reference after queueing
* succeeded. cifs_oplock_break() will
- * synchronize using GlobalSMSSeslock.
+ * synchronize using cifs_file_list_lock.
*/
if (queue_work(system_nrt_wq,
&netfile->oplock_break))
@@ -722,3 +721,23 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
cifs_sb_master_tcon(cifs_sb)->treeName);
}
}
+
+void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
+{
+ oplock &= 0xF;
+
+ if (oplock == OPLOCK_EXCLUSIVE) {
+ cinode->clientCanCacheAll = true;
+ cinode->clientCanCacheRead = true;
+ cFYI(1, "Exclusive Oplock granted on inode %p",
+ &cinode->vfs_inode);
+ } else if (oplock == OPLOCK_READ) {
+ cinode->clientCanCacheAll = false;
+ cinode->clientCanCacheRead = true;
+ cFYI(1, "Level II Oplock granted on inode %p",
+ &cinode->vfs_inode);
+ } else {
+ cinode->clientCanCacheAll = false;
+ cinode->clientCanCacheRead = false;
+ }
+}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index ef7bb7b..76b1b37 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -79,7 +79,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
cFYI(1, "For %s", name->name);
if (parent->d_op && parent->d_op->d_hash)
- parent->d_op->d_hash(parent, name);
+ parent->d_op->d_hash(parent, parent->d_inode, name);
else
name->hash = full_name_hash(name->name, name->len);
@@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
}
if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase)
- dentry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(dentry, &cifs_ci_dentry_ops);
else
- dentry->d_op = &cifs_dentry_ops;
+ d_set_d_op(dentry, &cifs_dentry_ops);
alias = d_materialise_unique(dentry, inode);
if (alias != NULL) {
@@ -160,6 +160,7 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes);
fattr->cf_eof = le64_to_cpu(info->EndOfFile);
fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
+ fattr->cf_createtime = le64_to_cpu(info->CreationTime);
fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
@@ -226,26 +227,29 @@ static int initiate_cifs_search(const int xid, struct file *file)
char *full_path = NULL;
struct cifsFileInfo *cifsFile;
struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
- struct tcon_link *tlink;
+ struct tcon_link *tlink = NULL;
struct cifsTconInfo *pTcon;
- tlink = cifs_sb_tlink(cifs_sb);
- if (IS_ERR(tlink))
- return PTR_ERR(tlink);
- pTcon = tlink_tcon(tlink);
-
- if (file->private_data == NULL)
- file->private_data =
- kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
if (file->private_data == NULL) {
- rc = -ENOMEM;
- goto error_exit;
+ tlink = cifs_sb_tlink(cifs_sb);
+ if (IS_ERR(tlink))
+ return PTR_ERR(tlink);
+
+ cifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
+ if (cifsFile == NULL) {
+ rc = -ENOMEM;
+ goto error_exit;
+ }
+ file->private_data = cifsFile;
+ cifsFile->tlink = cifs_get_tlink(tlink);
+ pTcon = tlink_tcon(tlink);
+ } else {
+ cifsFile = file->private_data;
+ pTcon = tlink_tcon(cifsFile->tlink);
}
- cifsFile = file->private_data;
cifsFile->invalidHandle = true;
cifsFile->srch_inf.endOfSearch = false;
- cifsFile->tlink = cifs_get_tlink(tlink);
full_path = build_path_from_dentry(file->f_path.dentry);
if (full_path == NULL) {
@@ -756,18 +760,6 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
rc = filldir(direntry, qstring.name, qstring.len, file->f_pos,
ino, fattr.cf_dtype);
- /*
- * we can not return filldir errors to the caller since they are
- * "normal" when the stat blocksize is too small - we return remapped
- * error instead
- *
- * FIXME: This looks bogus. filldir returns -EOVERFLOW in the above
- * case already. Why should we be clobbering other errors from it?
- */
- if (rc) {
- cFYI(1, "filldir rc = %d", rc);
- rc = -EOVERFLOW;
- }
dput(tmp_dentry);
return rc;
}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 2a11efd..eb74648 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -32,9 +32,6 @@
#include <linux/slab.h>
#include "cifs_spnego.h"
-extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
- unsigned char *p24);
-
/*
* Checks if this is the first smb session to be reconnected after
* the socket has been reestablished (so we know whether to use vc 0).
@@ -402,29 +399,27 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
return -EINVAL;
}
- memcpy(ses->cryptKey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
+ memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
/* BB we could decode pblob->NegotiateFlags; some may be useful */
/* In particular we can examine sign flags */
/* BB spec says that if AvId field of MsvAvTimestamp is populated then
we must set the MIC field of the AUTHENTICATE_MESSAGE */
-
+ ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
- ses->tilen = tilen;
- if (ses->tilen) {
- ses->tiblob = kmalloc(tilen, GFP_KERNEL);
- if (!ses->tiblob) {
+ if (tilen) {
+ ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
+ if (!ses->auth_key.response) {
cERROR(1, "Challenge target info allocation failure");
- ses->tilen = 0;
return -ENOMEM;
}
- memcpy(ses->tiblob, bcc_ptr + tioffset, ses->tilen);
+ memcpy(ses->auth_key.response, bcc_ptr + tioffset, tilen);
+ ses->auth_key.len = tilen;
}
return 0;
}
-#ifdef CONFIG_CIFS_EXPERIMENTAL
/* BB Move to ntlmssp.c eventually */
/* We do not malloc the blob, it is passed in pbuffer, because
@@ -435,20 +430,23 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer;
__u32 flags;
+ memset(pbuffer, 0, sizeof(NEGOTIATE_MESSAGE));
memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
sec_blob->MessageType = NtLmNegotiate;
/* BB is NTLMV2 session security format easier to use here? */
flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
- NTLMSSP_NEGOTIATE_NTLM;
+ NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
if (ses->server->secMode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+ (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
flags |= NTLMSSP_NEGOTIATE_SIGN;
- if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
- flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
+ if (!ses->server->session_estab)
+ flags |= NTLMSSP_NEGOTIATE_KEY_XCH |
+ NTLMSSP_NEGOTIATE_EXTENDED_SEC;
+ }
- sec_blob->NegotiateFlags |= cpu_to_le32(flags);
+ sec_blob->NegotiateFlags = cpu_to_le32(flags);
sec_blob->WorkstationName.BufferOffset = 0;
sec_blob->WorkstationName.Length = 0;
@@ -469,11 +467,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
const struct nls_table *nls_cp)
{
int rc;
- unsigned int size;
AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
__u32 flags;
unsigned char *tmp;
- struct ntlmv2_resp ntlmv2_response = {};
memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
sec_blob->MessageType = NtLmAuthenticate;
@@ -481,7 +477,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
flags = NTLMSSP_NEGOTIATE_56 |
NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
- NTLMSSP_NEGOTIATE_NTLM;
+ NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
if (ses->server->secMode &
(SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
flags |= NTLMSSP_NEGOTIATE_SIGN;
@@ -489,7 +485,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
- sec_blob->NegotiateFlags |= cpu_to_le32(flags);
+ sec_blob->NegotiateFlags = cpu_to_le32(flags);
sec_blob->LmChallengeResponse.BufferOffset =
cpu_to_le32(sizeof(AUTHENTICATE_MESSAGE));
@@ -497,25 +493,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
sec_blob->LmChallengeResponse.MaximumLength = 0;
sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
- rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp);
+ rc = setup_ntlmv2_rsp(ses, nls_cp);
if (rc) {
cERROR(1, "Error %d during NTLMSSP authentication", rc);
goto setup_ntlmv2_ret;
}
- size = sizeof(struct ntlmv2_resp);
- memcpy(tmp, (char *)&ntlmv2_response, size);
- tmp += size;
- if (ses->tilen > 0) {
- memcpy(tmp, ses->tiblob, ses->tilen);
- tmp += ses->tilen;
- }
+ memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
- sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + ses->tilen);
+ sec_blob->NtChallengeResponse.Length =
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
sec_blob->NtChallengeResponse.MaximumLength =
- cpu_to_le16(size + ses->tilen);
- kfree(ses->tiblob);
- ses->tiblob = NULL;
- ses->tilen = 0;
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
if (ses->domainName == NULL) {
sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -554,26 +544,26 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
sec_blob->WorkstationName.MaximumLength = 0;
tmp += 2;
- sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
- sec_blob->SessionKey.Length = 0;
- sec_blob->SessionKey.MaximumLength = 0;
+ if (((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) ||
+ (ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
+ && !calc_seckey(ses)) {
+ memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
+ sec_blob->SessionKey.MaximumLength =
+ cpu_to_le16(CIFS_CPHTXT_SIZE);
+ tmp += CIFS_CPHTXT_SIZE;
+ } else {
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.Length = 0;
+ sec_blob->SessionKey.MaximumLength = 0;
+ }
setup_ntlmv2_ret:
*buflen = tmp - pbuffer;
return rc;
}
-
-static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB,
- struct cifsSesInfo *ses)
-{
- build_ntlmssp_negotiate_blob(&pSMB->req.SecurityBlob[0], ses);
- pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
-
- return;
-}
-#endif
-
int
CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
const struct nls_table *nls_cp)
@@ -600,8 +590,16 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
return -EINVAL;
type = ses->server->secType;
-
cFYI(1, "sess setup type %d", type);
+ if (type == RawNTLMSSP) {
+ /* if memory allocation is successful, caller of this function
+ * frees it.
+ */
+ ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
+ if (!ses->ntlmssp)
+ return -ENOMEM;
+ }
+
ssetup_ntlmssp_authenticate:
if (phase == NtLmChallenge)
phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
@@ -666,10 +664,14 @@ ssetup_ntlmssp_authenticate:
/* no capabilities flags in old lanman negotiation */
pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
- /* BB calculate hash with password */
- /* and copy into bcc */
- calc_lanman_hash(ses->password, ses->cryptKey,
+ /* Calculate hash with password and copy into bcc_ptr.
+ * Encryption Key (stored as in cryptkey) gets used if the
+ * security mode bit in Negottiate Protocol response states
+ * to use challenge/response method (i.e. Password bit is 1).
+ */
+
+ calc_lanman_hash(ses->password, ses->server->cryptkey,
ses->server->secMode & SECMODE_PW_ENCRYPT ?
true : false, lnm_session_key);
@@ -687,24 +689,27 @@ ssetup_ntlmssp_authenticate:
ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
#endif
} else if (type == NTLM) {
- char ntlm_session_key[CIFS_SESS_KEY_SIZE];
-
pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
pSMB->req_no_secext.CaseInsensitivePasswordLength =
- cpu_to_le16(CIFS_SESS_KEY_SIZE);
+ cpu_to_le16(CIFS_AUTH_RESP_SIZE);
pSMB->req_no_secext.CaseSensitivePasswordLength =
- cpu_to_le16(CIFS_SESS_KEY_SIZE);
+ cpu_to_le16(CIFS_AUTH_RESP_SIZE);
- /* calculate session key */
- SMBNTencrypt(ses->password, ses->cryptKey, ntlm_session_key);
+ /* calculate ntlm response and session key */
+ rc = setup_ntlm_response(ses);
+ if (rc) {
+ cERROR(1, "Error %d during NTLM authentication", rc);
+ goto ssetup_exit;
+ }
+
+ /* copy ntlm response */
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
- cifs_calculate_session_key(&ses->auth_key,
- ntlm_session_key, ses->password);
- /* copy session key */
- memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
- bcc_ptr += CIFS_SESS_KEY_SIZE;
- memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
- bcc_ptr += CIFS_SESS_KEY_SIZE;
if (ses->capabilities & CAP_UNICODE) {
/* unicode strings must be word aligned */
if (iov[0].iov_len % 2) {
@@ -715,47 +720,26 @@ ssetup_ntlmssp_authenticate:
} else
ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
} else if (type == NTLMv2) {
- char *v2_sess_key =
- kmalloc(sizeof(struct ntlmv2_resp), GFP_KERNEL);
-
- /* BB FIXME change all users of v2_sess_key to
- struct ntlmv2_resp */
-
- if (v2_sess_key == NULL) {
- rc = -ENOMEM;
- goto ssetup_exit;
- }
-
pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
/* LM2 password would be here if we supported it */
pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
- /* cpu_to_le16(LM2_SESS_KEY_SIZE); */
- /* calculate session key */
- rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
+ /* calculate nlmv2 response and session key */
+ rc = setup_ntlmv2_rsp(ses, nls_cp);
if (rc) {
cERROR(1, "Error %d during NTLMv2 authentication", rc);
- kfree(v2_sess_key);
goto ssetup_exit;
}
- memcpy(bcc_ptr, (char *)v2_sess_key,
- sizeof(struct ntlmv2_resp));
- bcc_ptr += sizeof(struct ntlmv2_resp);
- kfree(v2_sess_key);
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+
/* set case sensitive password length after tilen may get
* assigned, tilen is 0 otherwise.
*/
pSMB->req_no_secext.CaseSensitivePasswordLength =
- cpu_to_le16(sizeof(struct ntlmv2_resp) + ses->tilen);
- if (ses->tilen > 0) {
- memcpy(bcc_ptr, ses->tiblob, ses->tilen);
- bcc_ptr += ses->tilen;
- /* we never did allocate ses->domainName to free */
- kfree(ses->tiblob);
- ses->tiblob = NULL;
- ses->tilen = 0;
- }
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
if (ses->capabilities & CAP_UNICODE) {
if (iov[0].iov_len % 2) {
@@ -768,6 +752,7 @@ ssetup_ntlmssp_authenticate:
} else if (type == Kerberos) {
#ifdef CONFIG_CIFS_UPCALL
struct cifs_spnego_msg *msg;
+
spnego_key = cifs_get_spnego_key(ses);
if (IS_ERR(spnego_key)) {
rc = PTR_ERR(spnego_key);
@@ -785,16 +770,17 @@ ssetup_ntlmssp_authenticate:
rc = -EKEYREJECTED;
goto ssetup_exit;
}
- /* bail out if key is too long */
- if (msg->sesskey_len >
- sizeof(ses->auth_key.data.krb5)) {
- cERROR(1, "Kerberos signing key too long (%u bytes)",
- msg->sesskey_len);
- rc = -EOVERFLOW;
+
+ ses->auth_key.response = kmalloc(msg->sesskey_len, GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ cERROR(1, "Kerberos can't allocate (%u bytes) memory",
+ msg->sesskey_len);
+ rc = -ENOMEM;
goto ssetup_exit;
}
+ memcpy(ses->auth_key.response, msg->data, msg->sesskey_len);
ses->auth_key.len = msg->sesskey_len;
- memcpy(ses->auth_key.data.krb5, msg->data, msg->sesskey_len);
+
pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
capabilities |= CAP_EXTENDED_SECURITY;
pSMB->req.Capabilities = cpu_to_le32(capabilities);
@@ -818,71 +804,70 @@ ssetup_ntlmssp_authenticate:
rc = -ENOSYS;
goto ssetup_exit;
#endif /* CONFIG_CIFS_UPCALL */
- } else {
-#ifdef CONFIG_CIFS_EXPERIMENTAL
- if (type == RawNTLMSSP) {
- if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
- cERROR(1, "NTLMSSP requires Unicode support");
- rc = -ENOSYS;
+ } else if (type == RawNTLMSSP) {
+ if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
+ cERROR(1, "NTLMSSP requires Unicode support");
+ rc = -ENOSYS;
+ goto ssetup_exit;
+ }
+
+ cFYI(1, "ntlmssp session setup phase %d", phase);
+ pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
+ capabilities |= CAP_EXTENDED_SECURITY;
+ pSMB->req.Capabilities |= cpu_to_le32(capabilities);
+ switch(phase) {
+ case NtLmNegotiate:
+ build_ntlmssp_negotiate_blob(
+ pSMB->req.SecurityBlob, ses);
+ iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
+ iov[1].iov_base = pSMB->req.SecurityBlob;
+ pSMB->req.SecurityBlobLength =
+ cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
+ break;
+ case NtLmAuthenticate:
+ /*
+ * 5 is an empirical value, large enough to hold
+ * authenticate message plus max 10 of av paris,
+ * domain, user, workstation names, flags, etc.
+ */
+ ntlmsspblob = kzalloc(
+ 5*sizeof(struct _AUTHENTICATE_MESSAGE),
+ GFP_KERNEL);
+ if (!ntlmsspblob) {
+ cERROR(1, "Can't allocate NTLMSSP blob");
+ rc = -ENOMEM;
goto ssetup_exit;
}
- cFYI(1, "ntlmssp session setup phase %d", phase);
- pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
- capabilities |= CAP_EXTENDED_SECURITY;
- pSMB->req.Capabilities |= cpu_to_le32(capabilities);
- if (phase == NtLmNegotiate) {
- setup_ntlmssp_neg_req(pSMB, ses);
- iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
- iov[1].iov_base = &pSMB->req.SecurityBlob[0];
- } else if (phase == NtLmAuthenticate) {
- /* 5 is an empirical value, large enought to
- * hold authenticate message, max 10 of
- * av paris, doamin,user,workstation mames,
- * flags etc..
- */
- ntlmsspblob = kmalloc(
- 5*sizeof(struct _AUTHENTICATE_MESSAGE),
- GFP_KERNEL);
- if (!ntlmsspblob) {
- cERROR(1, "Can't allocate NTLMSSP");
- rc = -ENOMEM;
- goto ssetup_exit;
- }
-
- rc = build_ntlmssp_auth_blob(ntlmsspblob,
- &blob_len, ses, nls_cp);
- if (rc)
- goto ssetup_exit;
- iov[1].iov_len = blob_len;
- iov[1].iov_base = ntlmsspblob;
- pSMB->req.SecurityBlobLength =
- cpu_to_le16(blob_len);
- /* Make sure that we tell the server that we
- are using the uid that it just gave us back
- on the response (challenge) */
- smb_buf->Uid = ses->Suid;
- } else {
- cERROR(1, "invalid phase %d", phase);
- rc = -ENOSYS;
+ rc = build_ntlmssp_auth_blob(ntlmsspblob,
+ &blob_len, ses, nls_cp);
+ if (rc)
goto ssetup_exit;
- }
- /* unicode strings must be word aligned */
- if ((iov[0].iov_len + iov[1].iov_len) % 2) {
- *bcc_ptr = 0;
- bcc_ptr++;
- }
- unicode_oslm_strings(&bcc_ptr, nls_cp);
- } else {
- cERROR(1, "secType %d not supported!", type);
+ iov[1].iov_len = blob_len;
+ iov[1].iov_base = ntlmsspblob;
+ pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len);
+ /*
+ * Make sure that we tell the server that we are using
+ * the uid that it just gave us back on the response
+ * (challenge)
+ */
+ smb_buf->Uid = ses->Suid;
+ break;
+ default:
+ cERROR(1, "invalid phase %d", phase);
rc = -ENOSYS;
goto ssetup_exit;
}
-#else
+ /* unicode strings must be word aligned */
+ if ((iov[0].iov_len + iov[1].iov_len) % 2) {
+ *bcc_ptr = 0;
+ bcc_ptr++;
+ }
+ unicode_oslm_strings(&bcc_ptr, nls_cp);
+ } else {
cERROR(1, "secType %d not supported!", type);
rc = -ENOSYS;
goto ssetup_exit;
-#endif
}
iov[2].iov_base = str_area;
@@ -897,8 +882,6 @@ ssetup_ntlmssp_authenticate:
CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR);
/* SMB request buf freed in SendReceive2 */
- cFYI(1, "ssetup rc from sendrecv2 is %d", rc);
-
pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
smb_buf = (struct smb_hdr *)iov[0].iov_base;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index a66c91e..59ca81b 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -119,7 +119,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
if (ssocket == NULL)
return -ENOTSOCK; /* BB eventually add reconnect code here */
- smb_msg.msg_name = (struct sockaddr *) &server->addr.sockAddr;
+ smb_msg.msg_name = (struct sockaddr *) &server->dstaddr;
smb_msg.msg_namelen = sizeof(struct sockaddr);
smb_msg.msg_control = NULL;
smb_msg.msg_controllen = 0;
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
(ses->server->secMode & (SECMODE_SIGN_REQUIRED |
SECMODE_SIGN_ENABLED))) {
rc = cifs_verify_signature(midQ->resp_buf,
- &ses->server->session_key,
+ ses->server,
midQ->sequence_number+1);
if (rc) {
cERROR(1, "Unexpected SMB signature");
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
(ses->server->secMode & (SECMODE_SIGN_REQUIRED |
SECMODE_SIGN_ENABLED))) {
rc = cifs_verify_signature(out_buf,
- &ses->server->session_key,
+ ses->server,
midQ->sequence_number+1);
if (rc) {
cERROR(1, "Unexpected SMB signature");
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
(ses->server->secMode & (SECMODE_SIGN_REQUIRED |
SECMODE_SIGN_ENABLED))) {
rc = cifs_verify_signature(out_buf,
- &ses->server->session_key,
+ ses->server,
midQ->sequence_number+1);
if (rc) {
cERROR(1, "Unexpected SMB signature");
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index a264b74..eae2a14 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -30,10 +30,11 @@
#define MAX_EA_VALUE_SIZE 65535
#define CIFS_XATTR_DOS_ATTRIB "user.DosAttrib"
+#define CIFS_XATTR_CIFS_ACL "system.cifs_acl"
#define CIFS_XATTR_USER_PREFIX "user."
#define CIFS_XATTR_SYSTEM_PREFIX "system."
#define CIFS_XATTR_OS2_PREFIX "os2."
-#define CIFS_XATTR_SECURITY_PREFIX ".security"
+#define CIFS_XATTR_SECURITY_PREFIX "security."
#define CIFS_XATTR_TRUSTED_PREFIX "trusted."
#define XATTR_TRUSTED_PREFIX_LEN 8
#define XATTR_SECURITY_PREFIX_LEN 9
@@ -277,29 +278,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
- else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
- __u16 fid;
- int oplock = 0;
- struct cifs_ntsd *pacl = NULL;
- __u32 buflen = 0;
- if (experimEnabled)
- rc = CIFSSMBOpen(xid, pTcon, full_path,
- FILE_OPEN, GENERIC_READ, 0, &fid,
- &oplock, NULL, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- /* else rc is EOPNOTSUPP from above */
-
- if (rc == 0) {
- rc = CIFSSMBGetCIFSACL(xid, pTcon, fid, &pacl,
- &buflen);
- CIFSSMBClose(xid, pTcon, fid);
- }
- }
-#endif /* EXPERIMENTAL */
#else
- cFYI(1, "query POSIX ACL not supported yet");
+ cFYI(1, "Query POSIX ACL not supported yet");
#endif /* CONFIG_CIFS_POSIX */
} else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
@@ -311,8 +291,33 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
#else
- cFYI(1, "query POSIX default ACL not supported yet");
-#endif
+ cFYI(1, "Query POSIX default ACL not supported yet");
+#endif /* CONFIG_CIFS_POSIX */
+ } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
+ strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
+#ifdef CONFIG_CIFS_ACL
+ u32 acllen;
+ struct cifs_ntsd *pacl;
+
+ pacl = get_cifs_acl(cifs_sb, direntry->d_inode,
+ full_path, &acllen);
+ if (IS_ERR(pacl)) {
+ rc = PTR_ERR(pacl);
+ cERROR(1, "%s: error %zd getting sec desc",
+ __func__, rc);
+ } else {
+ if (ea_value) {
+ if (acllen > buf_size)
+ acllen = -ERANGE;
+ else
+ memcpy(ea_value, pacl, acllen);
+ }
+ rc = acllen;
+ kfree(pacl);
+ }
+#else
+ cFYI(1, "Query CIFS ACL not supported yet");
+#endif /* CONFIG_CIFS_ACL */
} else if (strncmp(ea_name,
CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) {
cFYI(1, "Trusted xattr namespace not supported yet");
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 9060f08..5525e1c 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
struct list_head *child;
struct dentry *de;
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
list_for_each(child, &parent->d_subdirs)
{
de = list_entry(child, struct dentry, d_u.d_child);
@@ -102,7 +102,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
continue;
coda_flag_inode(de->d_inode, flag);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
return;
}
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 5d8b355..29badd9 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -18,6 +18,7 @@
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/spinlock.h>
+#include <linux/namei.h>
#include <asm/uaccess.h>
@@ -47,7 +48,7 @@ static int coda_readdir(struct file *file, void *buf, filldir_t filldir);
/* dentry ops */
static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd);
-static int coda_dentry_delete(struct dentry *);
+static int coda_dentry_delete(const struct dentry *);
/* support routines */
static int coda_venus_readdir(struct file *coda_file, void *buf,
@@ -125,7 +126,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc
return ERR_PTR(error);
exit:
- entry->d_op = &coda_dentry_operations;
+ d_set_d_op(entry, &coda_dentry_operations);
if (inode && (type & CODA_NOCACHE))
coda_flag_inode(inode, C_VATTR | C_PURGE);
@@ -134,10 +135,13 @@ exit:
}
-int coda_permission(struct inode *inode, int mask)
+int coda_permission(struct inode *inode, int mask, unsigned int flags)
{
int error;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
if (!mask)
@@ -541,9 +545,13 @@ out:
/* called when a cache lookup succeeds */
static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
{
- struct inode *inode = de->d_inode;
+ struct inode *inode;
struct coda_inode_info *cii;
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = de->d_inode;
if (!inode || coda_isroot(inode))
goto out;
if (is_bad_inode(inode))
@@ -559,7 +567,7 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
if (cii->c_flags & C_FLUSH)
coda_flag_inode_children(inode, C_FLUSH);
- if (atomic_read(&de->d_count) > 1)
+ if (de->d_count > 1)
/* pretend it's valid, but don't change the flags */
goto out;
@@ -577,7 +585,7 @@ out:
* This is the callback from dput() when d_count is going to 0.
* We use this to unhash dentries with bad inodes.
*/
-static int coda_dentry_delete(struct dentry * dentry)
+static int coda_dentry_delete(const struct dentry * dentry)
{
int flags;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 7993b96..f065a5d 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -45,7 +45,7 @@ static struct kmem_cache * coda_inode_cachep;
static struct inode *coda_alloc_inode(struct super_block *sb)
{
struct coda_inode_info *ei;
- ei = (struct coda_inode_info *)kmem_cache_alloc(coda_inode_cachep, GFP_KERNEL);
+ ei = kmem_cache_alloc(coda_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
memset(&ei->c_fid, 0, sizeof(struct CodaFid));
@@ -56,11 +56,18 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void coda_destroy_inode(struct inode *inode)
+static void coda_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(coda_inode_cachep, ITOC(inode));
}
+static void coda_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, coda_i_callback);
+}
+
static void init_once(void *foo)
{
struct coda_inode_info *ei = (struct coda_inode_info *) foo;
@@ -306,16 +313,16 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
/* init_coda: used by filesystems.c to register coda */
-static int coda_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *coda_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt);
+ return mount_nodev(fs_type, flags, data, coda_fill_super);
}
struct file_system_type coda_fs_type = {
.owner = THIS_MODULE,
.name = "coda",
- .get_sb = coda_get_sb,
+ .mount = coda_mount,
.kill_sb = kill_anon_super,
.fs_flags = FS_BINARY_MOUNTDATA,
};
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 2fd89b5..741f0bd 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,7 +24,7 @@
#include <linux/coda_psdev.h>
/* pioctl ops */
-static int coda_ioctl_permission(struct inode *inode, int mask);
+static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags);
static long coda_pioctl(struct file *filp, unsigned int cmd,
unsigned long user_data);
@@ -41,8 +41,10 @@ const struct file_operations coda_ioctl_operations = {
};
/* the coda pioctl inode ops */
-static int coda_ioctl_permission(struct inode *inode, int mask)
+static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
{
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
return (mask & MAY_EXEC) ? -EACCES : 0;
}
diff --git a/fs/compat.c b/fs/compat.c
index f03abda..eb1740a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -29,8 +29,6 @@
#include <linux/vfs.h>
#include <linux/ioctl.h>
#include <linux/init.h>
-#include <linux/smb.h>
-#include <linux/smb_mount.h>
#include <linux/ncp_mount.h>
#include <linux/nfs4_mount.h>
#include <linux/syscalls.h>
@@ -51,6 +49,7 @@
#include <linux/eventpoll.h>
#include <linux/fs_struct.h>
#include <linux/slab.h>
+#include <linux/pagemap.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -608,14 +607,14 @@ ssize_t compat_rw_copy_check_uvector(int type,
/*
* Single unix specification:
* We should -EINVAL if an element length is not >= 0 and fitting an
- * ssize_t. The total length is fitting an ssize_t
+ * ssize_t.
*
- * Be careful here because iov_len is a size_t not an ssize_t
+ * In Linux, the total length is limited to MAX_RW_COUNT, there is
+ * no overflow possibility.
*/
tot_len = 0;
ret = -EINVAL;
for (seg = 0; seg < nr_segs; seg++) {
- compat_ssize_t tmp = tot_len;
compat_uptr_t buf;
compat_ssize_t len;
@@ -626,13 +625,13 @@ ssize_t compat_rw_copy_check_uvector(int type,
}
if (len < 0) /* size_t not fitting in compat_ssize_t .. */
goto out;
- tot_len += len;
- if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
- goto out;
if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
ret = -EFAULT;
goto out;
}
+ if (len > MAX_RW_COUNT - tot_len)
+ len = MAX_RW_COUNT - tot_len;
+ tot_len += len;
iov->iov_base = compat_ptr(buf);
iov->iov_len = (compat_size_t) len;
uvector++;
@@ -745,30 +744,6 @@ static void *do_ncp_super_data_conv(void *raw_data)
return raw_data;
}
-struct compat_smb_mount_data {
- compat_int_t version;
- __compat_uid_t mounted_uid;
- __compat_uid_t uid;
- __compat_gid_t gid;
- compat_mode_t file_mode;
- compat_mode_t dir_mode;
-};
-
-static void *do_smb_super_data_conv(void *raw_data)
-{
- struct smb_mount_data *s = raw_data;
- struct compat_smb_mount_data *c_s = raw_data;
-
- if (c_s->version != SMB_MOUNT_OLDVERSION)
- goto out;
- s->dir_mode = c_s->dir_mode;
- s->file_mode = c_s->file_mode;
- s->gid = c_s->gid;
- s->uid = c_s->uid;
- s->mounted_uid = c_s->mounted_uid;
- out:
- return raw_data;
-}
struct compat_nfs_string {
compat_uint_t len;
@@ -835,7 +810,6 @@ static int do_nfs4_super_data_conv(void *raw_data)
return 0;
}
-#define SMBFS_NAME "smbfs"
#define NCPFS_NAME "ncpfs"
#define NFS4_NAME "nfs4"
@@ -870,9 +844,7 @@ asmlinkage long compat_sys_mount(const char __user * dev_name,
retval = -EINVAL;
if (kernel_type && data_page) {
- if (!strcmp(kernel_type, SMBFS_NAME)) {
- do_smb_super_data_conv((void *)data_page);
- } else if (!strcmp(kernel_type, NCPFS_NAME)) {
+ if (!strcmp(kernel_type, NCPFS_NAME)) {
do_ncp_super_data_conv((void *)data_page);
} else if (!strcmp(kernel_type, NFS4_NAME)) {
if (do_nfs4_super_data_conv((void *) data_page))
@@ -1378,6 +1350,10 @@ static int compat_count(compat_uptr_t __user *argv, int max)
argv++;
if (i++ >= max)
return -E2BIG;
+
+ if (fatal_signal_pending(current))
+ return -ERESTARTNOHAND;
+ cond_resched();
}
}
return i;
@@ -1419,6 +1395,12 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
while (len > 0) {
int offset, bytes_to_copy;
+ if (fatal_signal_pending(current)) {
+ ret = -ERESTARTNOHAND;
+ goto out;
+ }
+ cond_resched();
+
offset = pos % PAGE_SIZE;
if (offset == 0)
offset = PAGE_SIZE;
@@ -1435,18 +1417,8 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
struct page *page;
-#ifdef CONFIG_STACK_GROWSUP
- ret = expand_stack_downwards(bprm->vma, pos);
- if (ret < 0) {
- /* We've exceed the stack rlimit. */
- ret = -E2BIG;
- goto out;
- }
-#endif
- ret = get_user_pages(current, bprm->mm, pos,
- 1, 1, 1, &page, NULL);
- if (ret <= 0) {
- /* We've exceed the stack rlimit. */
+ page = get_arg_page(bprm, pos, 1);
+ if (!page) {
ret = -E2BIG;
goto out;
}
@@ -1567,8 +1539,10 @@ int compat_do_execve(char * filename,
return retval;
out:
- if (bprm->mm)
+ if (bprm->mm) {
+ acct_arg_size(bprm, 0);
mmput(bprm->mm);
+ }
out_file:
if (bprm->file) {
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index d0ad09d..61abb63 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -19,7 +19,6 @@
#include <linux/compiler.h>
#include <linux/sched.h>
#include <linux/smp.h>
-#include <linux/smp_lock.h>
#include <linux/ioctl.h>
#include <linux/if.h>
#include <linux/if_bridge.h>
@@ -43,10 +42,9 @@
#include <linux/tty.h>
#include <linux/vt_kern.h>
#include <linux/fb.h>
-#include <linux/videodev.h>
+#include <linux/videodev2.h>
#include <linux/netdevice.h>
#include <linux/raw.h>
-#include <linux/smb_fs.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/rtc.h>
@@ -558,25 +556,6 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
#endif /* CONFIG_BLOCK */
-static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
- compat_uid_t __user *argp)
-{
- mm_segment_t old_fs = get_fs();
- __kernel_uid_t kuid;
- int err;
-
- cmd = SMB_IOC_GETMOUNTUID;
-
- set_fs(KERNEL_DS);
- err = sys_ioctl(fd, cmd, (unsigned long)&kuid);
- set_fs(old_fs);
-
- if (err >= 0)
- err = put_user(kuid, argp);
-
- return err;
-}
-
/* Bluetooth ioctls */
#define HCIUARTSETPROTO _IOW('U', 200, int)
#define HCIUARTGETPROTO _IOR('U', 201, int)
@@ -857,6 +836,7 @@ COMPATIBLE_IOCTL(TCSETSW)
COMPATIBLE_IOCTL(TCSETSF)
COMPATIBLE_IOCTL(TIOCLINUX)
COMPATIBLE_IOCTL(TIOCSBRK)
+COMPATIBLE_IOCTL(TIOCGDEV)
COMPATIBLE_IOCTL(TIOCCBRK)
COMPATIBLE_IOCTL(TIOCGSID)
COMPATIBLE_IOCTL(TIOCGICOUNT)
@@ -1199,8 +1179,9 @@ COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5)
COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
COMPATIBLE_IOCTL(OSS_GETVERSION)
-/* SMB ioctls which do not need any translations */
-COMPATIBLE_IOCTL(SMB_IOC_NEWCONN)
+/* Raw devices */
+COMPATIBLE_IOCTL(RAW_SETBIND)
+COMPATIBLE_IOCTL(RAW_GETBIND)
/* Watchdog */
COMPATIBLE_IOCTL(WDIOC_GETSUPPORT)
COMPATIBLE_IOCTL(WDIOC_GETSTATUS)
@@ -1458,10 +1439,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
case MTIOCPOS32:
return mt_ioctl_trans(fd, cmd, argp);
#endif
- /* One SMB ioctl needs translations. */
-#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
- case SMB_IOC_GETMOUNTUID_32:
- return do_smb_getmountuid(fd, cmd, argp);
/* Serial */
case TIOCGSERIAL:
case TIOCSSERIAL:
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da6061a..026cf68 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -120,7 +120,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
{
struct config_item * item = NULL;
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
if (!d_unhashed(dentry)) {
struct configfs_dirent * sd = dentry->d_fsdata;
if (sd->s_type & CONFIGFS_ITEM_LINK) {
@@ -129,7 +129,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
} else
item = config_item_get(sd->s_element);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
return item;
}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 0b502f8..36637a8 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -67,7 +67,7 @@ static void configfs_d_iput(struct dentry * dentry,
* We _must_ delete our dentries on last dput, as the chain-to-parent
* behavior is required to clear the parents of default_groups.
*/
-static int configfs_d_delete(struct dentry *dentry)
+static int configfs_d_delete(const struct dentry *dentry)
{
return 1;
}
@@ -232,10 +232,8 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd,
sd->s_mode = mode;
sd->s_dentry = dentry;
- if (dentry) {
+ if (dentry)
dentry->d_fsdata = configfs_get(sd);
- dentry->d_op = &configfs_dentry_ops;
- }
return 0;
}
@@ -278,7 +276,6 @@ static int create_dir(struct config_item * k, struct dentry * p,
error = configfs_create(d, mode, init_dir);
if (!error) {
inc_nlink(p->d_inode);
- (d)->d_op = &configfs_dentry_ops;
} else {
struct configfs_dirent *sd = d->d_fsdata;
if (sd) {
@@ -371,9 +368,7 @@ int configfs_create_link(struct configfs_symlink *sl,
CONFIGFS_ITEM_LINK);
if (!err) {
err = configfs_create(dentry, mode, init_symlink);
- if (!err)
- dentry->d_op = &configfs_dentry_ops;
- else {
+ if (err) {
struct configfs_dirent *sd = dentry->d_fsdata;
if (sd) {
spin_lock(&configfs_dirent_lock);
@@ -399,8 +394,7 @@ static void remove_dir(struct dentry * d)
if (d->d_inode)
simple_rmdir(parent->d_inode,d);
- pr_debug(" o %s removing done (%d)\n",d->d_name.name,
- atomic_read(&d->d_count));
+ pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count);
dput(parent);
}
@@ -448,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
return error;
}
- dentry->d_op = &configfs_dentry_ops;
+ d_set_d_op(dentry, &configfs_dentry_ops);
d_rehash(dentry);
return 0;
@@ -493,7 +487,11 @@ static struct dentry * configfs_lookup(struct inode *dir,
* If it doesn't exist and it isn't a NOT_PINNED item,
* it must be negative.
*/
- return simple_lookup(dir, dentry, nd);
+ if (dentry->d_name.len > NAME_MAX)
+ return ERR_PTR(-ENAMETOOLONG);
+ d_set_d_op(dentry, &configfs_dentry_ops);
+ d_add(dentry, NULL);
+ return NULL;
}
out:
@@ -685,6 +683,7 @@ static int create_default_group(struct config_group *parent_group,
ret = -ENOMEM;
child = d_alloc(parent, &name);
if (child) {
+ d_set_d_op(child, &configfs_dentry_ops);
d_add(child, NULL);
ret = configfs_attach_group(&parent_group->cg_item,
@@ -1682,6 +1681,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
err = -ENOMEM;
dentry = d_alloc(configfs_sb->s_root, &name);
if (dentry) {
+ d_set_d_op(dentry, &configfs_dentry_ops);
d_add(dentry, NULL);
err = configfs_attach_group(sd->s_element, &group->cg_item,
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 253476d..c83f476 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -250,18 +250,14 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
struct dentry * dentry = sd->s_dentry;
if (dentry) {
- spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
if (!(d_unhashed(dentry) && dentry->d_inode)) {
- dget_locked(dentry);
+ dget_dlock(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
simple_unlink(parent->d_inode, dentry);
- } else {
+ } else
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
- }
}
}
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 8c8d642..7d3607f 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -104,16 +104,16 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
return 0;
}
-static int configfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *configfs_do_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt);
+ return mount_single(fs_type, flags, data, configfs_fill_super);
}
static struct file_system_type configfs_fs_type = {
.owner = THIS_MODULE,
.name = "configfs",
- .get_sb = configfs_get_sb,
+ .mount = configfs_do_mount,
.kill_sb = kill_litter_super,
};
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 1e7a330..e141939 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -34,57 +34,81 @@ static const struct address_space_operations cramfs_aops;
static DEFINE_MUTEX(read_mutex);
-/* These two macros may change in future, to provide better st_ino
- semantics. */
-#define CRAMINO(x) (((x)->offset && (x)->size)?(x)->offset<<2:1)
+/* These macros may change in future, to provide better st_ino semantics. */
#define OFFSET(x) ((x)->i_ino)
-static void setup_inode(struct inode *inode, struct cramfs_inode * cramfs_inode)
+static unsigned long cramino(struct cramfs_inode *cino, unsigned int offset)
{
+ if (!cino->offset)
+ return offset + 1;
+ if (!cino->size)
+ return offset + 1;
+
+ /*
+ * The file mode test fixes buggy mkcramfs implementations where
+ * cramfs_inode->offset is set to a non zero value for entries
+ * which did not contain data, like devices node and fifos.
+ */
+ switch (cino->mode & S_IFMT) {
+ case S_IFREG:
+ case S_IFDIR:
+ case S_IFLNK:
+ return cino->offset << 2;
+ default:
+ break;
+ }
+ return offset + 1;
+}
+
+static struct inode *get_cramfs_inode(struct super_block *sb,
+ struct cramfs_inode *cramfs_inode, unsigned int offset)
+{
+ struct inode *inode;
static struct timespec zerotime;
+
+ inode = iget_locked(sb, cramino(cramfs_inode, offset));
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW))
+ return inode;
+
+ switch (cramfs_inode->mode & S_IFMT) {
+ case S_IFREG:
+ inode->i_fop = &generic_ro_fops;
+ inode->i_data.a_ops = &cramfs_aops;
+ break;
+ case S_IFDIR:
+ inode->i_op = &cramfs_dir_inode_operations;
+ inode->i_fop = &cramfs_directory_operations;
+ break;
+ case S_IFLNK:
+ inode->i_op = &page_symlink_inode_operations;
+ inode->i_data.a_ops = &cramfs_aops;
+ break;
+ default:
+ init_special_inode(inode, cramfs_inode->mode,
+ old_decode_dev(cramfs_inode->size));
+ }
+
inode->i_mode = cramfs_inode->mode;
inode->i_uid = cramfs_inode->uid;
- inode->i_size = cramfs_inode->size;
- inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
inode->i_gid = cramfs_inode->gid;
+
+ /* if the lower 2 bits are zero, the inode contains data */
+ if (!(inode->i_ino & 3)) {
+ inode->i_size = cramfs_inode->size;
+ inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
+ }
+
/* Struct copy intentional */
inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
/* inode->i_nlink is left 1 - arguably wrong for directories,
but it's the best we can do without reading the directory
contents. 1 yields the right result in GNU find, even
without -noleaf option. */
- if (S_ISREG(inode->i_mode)) {
- inode->i_fop = &generic_ro_fops;
- inode->i_data.a_ops = &cramfs_aops;
- } else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = &cramfs_dir_inode_operations;
- inode->i_fop = &cramfs_directory_operations;
- } else if (S_ISLNK(inode->i_mode)) {
- inode->i_op = &page_symlink_inode_operations;
- inode->i_data.a_ops = &cramfs_aops;
- } else {
- init_special_inode(inode, inode->i_mode,
- old_decode_dev(cramfs_inode->size));
- }
-}
-static struct inode *get_cramfs_inode(struct super_block *sb,
- struct cramfs_inode * cramfs_inode)
-{
- struct inode *inode;
- if (CRAMINO(cramfs_inode) == 1) {
- inode = new_inode(sb);
- if (inode) {
- inode->i_ino = 1;
- setup_inode(inode, cramfs_inode);
- }
- } else {
- inode = iget_locked(sb, CRAMINO(cramfs_inode));
- if (inode && (inode->i_state & I_NEW)) {
- setup_inode(inode, cramfs_inode);
- unlock_new_inode(inode);
- }
- }
+ unlock_new_inode(inode);
+
return inode;
}
@@ -265,6 +289,9 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
printk(KERN_ERR "cramfs: root is not a directory\n");
goto out;
}
+ /* correct strange, hard-coded permissions of mkcramfs */
+ super.root.mode |= (S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
+
root_offset = super.root.offset << 2;
if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) {
sbi->size=super.size;
@@ -289,7 +316,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
/* Set it all up.. */
sb->s_op = &cramfs_ops;
- root = get_cramfs_inode(sb, &super.root);
+ root = get_cramfs_inode(sb, &super.root, 0);
if (!root)
goto out;
sb->s_root = d_alloc_root(root);
@@ -365,7 +392,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
*/
namelen = de->namelen << 2;
memcpy(buf, name, namelen);
- ino = CRAMINO(de);
+ ino = cramino(de, OFFSET(inode) + offset);
mode = de->mode;
mutex_unlock(&read_mutex);
nextoffset = offset + sizeof(*de) + namelen;
@@ -404,8 +431,9 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
struct cramfs_inode *de;
char *name;
int namelen, retval;
+ int dir_off = OFFSET(dir) + offset;
- de = cramfs_read(dir->i_sb, OFFSET(dir) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN);
+ de = cramfs_read(dir->i_sb, dir_off, sizeof(*de)+CRAMFS_MAXPATHLEN);
name = (char *)(de+1);
/* Try to take advantage of sorted directories */
@@ -436,7 +464,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
if (!retval) {
struct cramfs_inode entry = *de;
mutex_unlock(&read_mutex);
- d_add(dentry, get_cramfs_inode(dir->i_sb, &entry));
+ d_add(dentry, get_cramfs_inode(dir->i_sb, &entry, dir_off));
return NULL;
}
/* else (retval < 0) */
@@ -533,17 +561,16 @@ static const struct super_operations cramfs_ops = {
.statfs = cramfs_statfs,
};
-static int cramfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *cramfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, cramfs_fill_super);
}
static struct file_system_type cramfs_fs_type = {
.owner = THIS_MODULE,
.name = "cramfs",
- .get_sb = cramfs_get_sb,
+ .mount = cramfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/dcache.c b/fs/dcache.c
index 23702a9..5699d4c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -33,20 +33,58 @@
#include <linux/bootmem.h>
#include <linux/fs_struct.h>
#include <linux/hardirq.h>
+#include <linux/bit_spinlock.h>
+#include <linux/rculist_bl.h>
#include "internal.h"
+/*
+ * Usage:
+ * dcache->d_inode->i_lock protects:
+ * - i_dentry, d_alias, d_inode of aliases
+ * dcache_hash_bucket lock protects:
+ * - the dcache hash table
+ * s_anon bl list spinlock protects:
+ * - the s_anon list (see __d_drop)
+ * dcache_lru_lock protects:
+ * - the dcache lru lists and counters
+ * d_lock protects:
+ * - d_flags
+ * - d_name
+ * - d_lru
+ * - d_count
+ * - d_unhashed()
+ * - d_parent and d_subdirs
+ * - childrens' d_child and d_parent
+ * - d_alias, d_inode
+ *
+ * Ordering:
+ * dentry->d_inode->i_lock
+ * dentry->d_lock
+ * dcache_lru_lock
+ * dcache_hash_bucket lock
+ * s_anon lock
+ *
+ * If there is an ancestor relationship:
+ * dentry->d_parent->...->d_parent->d_lock
+ * ...
+ * dentry->d_parent->d_lock
+ * dentry->d_lock
+ *
+ * If no ancestor relationship:
+ * if (dentry1 < dentry2)
+ * dentry1->d_lock
+ * dentry2->d_lock
+ */
int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
-EXPORT_SYMBOL(dcache_lock);
+EXPORT_SYMBOL(rename_lock);
static struct kmem_cache *dentry_cache __read_mostly;
-#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
-
/*
* This is the single most critical data structure when it comes
* to the dcache: the hashtable for lookups. Somebody should try
@@ -60,22 +98,51 @@ static struct kmem_cache *dentry_cache __read_mostly;
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
-static struct hlist_head *dentry_hashtable __read_mostly;
+
+struct dcache_hash_bucket {
+ struct hlist_bl_head head;
+};
+static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
+
+static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
+ unsigned long hash)
+{
+ hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
+ hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
+ return dentry_hashtable + (hash & D_HASHMASK);
+}
+
+static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
+{
+ bit_spin_lock(0, (unsigned long *)&b->head.first);
+}
+
+static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
+{
+ __bit_spin_unlock(0, (unsigned long *)&b->head.first);
+}
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {
.age_limit = 45,
};
-static struct percpu_counter nr_dentry __cacheline_aligned_in_smp;
-static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
+static DEFINE_PER_CPU(unsigned int, nr_dentry);
#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+static int get_nr_dentry(void)
+{
+ int i;
+ int sum = 0;
+ for_each_possible_cpu(i)
+ sum += per_cpu(nr_dentry, i);
+ return sum < 0 ? 0 : sum;
+}
+
int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
size_t *lenp, loff_t *ppos)
{
- dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry);
- dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
+ dentry_stat.nr_dentry = get_nr_dentry();
return proc_dointvec(table, write, buffer, lenp, ppos);
}
#endif
@@ -91,35 +158,50 @@ static void __d_free(struct rcu_head *head)
}
/*
- * no dcache_lock, please.
+ * no locks, please.
*/
static void d_free(struct dentry *dentry)
{
- percpu_counter_dec(&nr_dentry);
+ BUG_ON(dentry->d_count);
+ this_cpu_dec(nr_dentry);
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
/* if dentry was never inserted into hash, immediate free is OK */
- if (hlist_unhashed(&dentry->d_hash))
+ if (hlist_bl_unhashed(&dentry->d_hash))
__d_free(&dentry->d_u.d_rcu);
else
call_rcu(&dentry->d_u.d_rcu, __d_free);
}
+/**
+ * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
+ * After this call, in-progress rcu-walk path lookup will fail. This
+ * should be called after unhashing, and after changing d_inode (if
+ * the dentry has not already been unhashed).
+ */
+static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
+{
+ assert_spin_locked(&dentry->d_lock);
+ /* Go through a barrier */
+ write_seqcount_barrier(&dentry->d_seq);
+}
+
/*
* Release the dentry's inode, using the filesystem
- * d_iput() operation if defined.
+ * d_iput() operation if defined. Dentry has no refcount
+ * and is unhashed.
*/
static void dentry_iput(struct dentry * dentry)
__releases(dentry->d_lock)
- __releases(dcache_lock)
+ __releases(dentry->d_inode->i_lock)
{
struct inode *inode = dentry->d_inode;
if (inode) {
dentry->d_inode = NULL;
list_del_init(&dentry->d_alias);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
if (!inode->i_nlink)
fsnotify_inoderemove(inode);
if (dentry->d_op && dentry->d_op->d_iput)
@@ -128,40 +210,72 @@ static void dentry_iput(struct dentry * dentry)
iput(inode);
} else {
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
}
}
/*
- * dentry_lru_(add|del|move_tail) must be called with dcache_lock held.
+ * Release the dentry's inode, using the filesystem
+ * d_iput() operation if defined. dentry remains in-use.
+ */
+static void dentry_unlink_inode(struct dentry * dentry)
+ __releases(dentry->d_lock)
+ __releases(dentry->d_inode->i_lock)
+{
+ struct inode *inode = dentry->d_inode;
+ dentry->d_inode = NULL;
+ list_del_init(&dentry->d_alias);
+ dentry_rcuwalk_barrier(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&inode->i_lock);
+ if (!inode->i_nlink)
+ fsnotify_inoderemove(inode);
+ if (dentry->d_op && dentry->d_op->d_iput)
+ dentry->d_op->d_iput(dentry, inode);
+ else
+ iput(inode);
+}
+
+/*
+ * dentry_lru_(add|del|move_tail) must be called with d_lock held.
*/
static void dentry_lru_add(struct dentry *dentry)
{
if (list_empty(&dentry->d_lru)) {
+ spin_lock(&dcache_lru_lock);
list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
dentry->d_sb->s_nr_dentry_unused++;
- percpu_counter_inc(&nr_dentry_unused);
+ dentry_stat.nr_unused++;
+ spin_unlock(&dcache_lru_lock);
}
}
+static void __dentry_lru_del(struct dentry *dentry)
+{
+ list_del_init(&dentry->d_lru);
+ dentry->d_sb->s_nr_dentry_unused--;
+ dentry_stat.nr_unused--;
+}
+
static void dentry_lru_del(struct dentry *dentry)
{
if (!list_empty(&dentry->d_lru)) {
- list_del_init(&dentry->d_lru);
- dentry->d_sb->s_nr_dentry_unused--;
- percpu_counter_dec(&nr_dentry_unused);
+ spin_lock(&dcache_lru_lock);
+ __dentry_lru_del(dentry);
+ spin_unlock(&dcache_lru_lock);
}
}
static void dentry_lru_move_tail(struct dentry *dentry)
{
+ spin_lock(&dcache_lru_lock);
if (list_empty(&dentry->d_lru)) {
list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
dentry->d_sb->s_nr_dentry_unused++;
- percpu_counter_inc(&nr_dentry_unused);
+ dentry_stat.nr_unused++;
} else {
list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
}
+ spin_unlock(&dcache_lru_lock);
}
/**
@@ -171,22 +285,115 @@ static void dentry_lru_move_tail(struct dentry *dentry)
* The dentry must already be unhashed and removed from the LRU.
*
* If this is the root of the dentry tree, return NULL.
+ *
+ * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
+ * d_kill.
*/
-static struct dentry *d_kill(struct dentry *dentry)
+static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
__releases(dentry->d_lock)
- __releases(dcache_lock)
+ __releases(parent->d_lock)
+ __releases(dentry->d_inode->i_lock)
{
- struct dentry *parent;
-
+ dentry->d_parent = NULL;
list_del(&dentry->d_u.d_child);
- /*drops the locks, at that point nobody can reach this dentry */
+ if (parent)
+ spin_unlock(&parent->d_lock);
dentry_iput(dentry);
+ /*
+ * dentry_iput drops the locks, at which point nobody (except
+ * transient RCU lookups) can reach this dentry.
+ */
+ d_free(dentry);
+ return parent;
+}
+
+/**
+ * d_drop - drop a dentry
+ * @dentry: dentry to drop
+ *
+ * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
+ * be found through a VFS lookup any more. Note that this is different from
+ * deleting the dentry - d_delete will try to mark the dentry negative if
+ * possible, giving a successful _negative_ lookup, while d_drop will
+ * just make the cache lookup fail.
+ *
+ * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
+ * reason (NFS timeouts or autofs deletes).
+ *
+ * __d_drop requires dentry->d_lock.
+ */
+void __d_drop(struct dentry *dentry)
+{
+ if (!(dentry->d_flags & DCACHE_UNHASHED)) {
+ if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) {
+ bit_spin_lock(0,
+ (unsigned long *)&dentry->d_sb->s_anon.first);
+ dentry->d_flags |= DCACHE_UNHASHED;
+ hlist_bl_del_init(&dentry->d_hash);
+ __bit_spin_unlock(0,
+ (unsigned long *)&dentry->d_sb->s_anon.first);
+ } else {
+ struct dcache_hash_bucket *b;
+ b = d_hash(dentry->d_parent, dentry->d_name.hash);
+ spin_lock_bucket(b);
+ /*
+ * We may not actually need to put DCACHE_UNHASHED
+ * manipulations under the hash lock, but follow
+ * the principle of least surprise.
+ */
+ dentry->d_flags |= DCACHE_UNHASHED;
+ hlist_bl_del_rcu(&dentry->d_hash);
+ spin_unlock_bucket(b);
+ dentry_rcuwalk_barrier(dentry);
+ }
+ }
+}
+EXPORT_SYMBOL(__d_drop);
+
+void d_drop(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL(d_drop);
+
+/*
+ * Finish off a dentry we've decided to kill.
+ * dentry->d_lock must be held, returns with it unlocked.
+ * If ref is non-zero, then decrement the refcount too.
+ * Returns dentry requiring refcount drop, or NULL if we're done.
+ */
+static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
+ __releases(dentry->d_lock)
+{
+ struct inode *inode;
+ struct dentry *parent;
+
+ inode = dentry->d_inode;
+ if (inode && !spin_trylock(&inode->i_lock)) {
+relock:
+ spin_unlock(&dentry->d_lock);
+ cpu_relax();
+ return dentry; /* try again with same dentry */
+ }
if (IS_ROOT(dentry))
parent = NULL;
else
parent = dentry->d_parent;
- d_free(dentry);
- return parent;
+ if (parent && !spin_trylock(&parent->d_lock)) {
+ if (inode)
+ spin_unlock(&inode->i_lock);
+ goto relock;
+ }
+
+ if (ref)
+ dentry->d_count--;
+ /* if dentry was on the d_lru list delete it from there */
+ dentry_lru_del(dentry);
+ /* if it was on the hash then remove it */
+ __d_drop(dentry);
+ return d_kill(dentry, parent);
}
/*
@@ -214,34 +421,26 @@ static struct dentry *d_kill(struct dentry *dentry)
* call the dentry unlink method as well as removing it from the queues and
* releasing its resources. If the parent dentries were scheduled for release
* they too may now get deleted.
- *
- * no dcache lock, please.
*/
-
void dput(struct dentry *dentry)
{
if (!dentry)
return;
repeat:
- if (atomic_read(&dentry->d_count) == 1)
+ if (dentry->d_count == 1)
might_sleep();
- if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
- return;
-
spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count)) {
+ BUG_ON(!dentry->d_count);
+ if (dentry->d_count > 1) {
+ dentry->d_count--;
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
return;
}
- /*
- * AV: ->d_delete() is _NOT_ allowed to block now.
- */
- if (dentry->d_op && dentry->d_op->d_delete) {
+ if (dentry->d_flags & DCACHE_OP_DELETE) {
if (dentry->d_op->d_delete(dentry))
- goto unhash_it;
+ goto kill_it;
}
/* Unreachable? Get rid of it */
@@ -252,16 +451,12 @@ repeat:
dentry->d_flags |= DCACHE_REFERENCED;
dentry_lru_add(dentry);
- spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
+ dentry->d_count--;
+ spin_unlock(&dentry->d_lock);
return;
-unhash_it:
- __d_drop(dentry);
kill_it:
- /* if dentry was on the d_lru list delete it from there */
- dentry_lru_del(dentry);
- dentry = d_kill(dentry);
+ dentry = dentry_kill(dentry, 1);
if (dentry)
goto repeat;
}
@@ -284,9 +479,9 @@ int d_invalidate(struct dentry * dentry)
/*
* If it's already been dropped, return OK.
*/
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
if (d_unhashed(dentry)) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
return 0;
}
/*
@@ -294,9 +489,9 @@ int d_invalidate(struct dentry * dentry)
* to get rid of unused child entries.
*/
if (!list_empty(&dentry->d_subdirs)) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
shrink_dcache_parent(dentry);
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
}
/*
@@ -309,35 +504,61 @@ int d_invalidate(struct dentry * dentry)
* we might still populate it if it was a
* working directory or similar).
*/
- spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count) > 1) {
+ if (dentry->d_count > 1) {
if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
return -EBUSY;
}
}
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
return 0;
}
EXPORT_SYMBOL(d_invalidate);
-/* This should be called _only_ with dcache_lock held */
-static inline struct dentry * __dget_locked(struct dentry *dentry)
+/* This must be called with d_lock held */
+static inline void __dget_dlock(struct dentry *dentry)
{
- atomic_inc(&dentry->d_count);
- dentry_lru_del(dentry);
- return dentry;
+ dentry->d_count++;
}
-struct dentry * dget_locked(struct dentry *dentry)
+static inline void __dget(struct dentry *dentry)
{
- return __dget_locked(dentry);
+ spin_lock(&dentry->d_lock);
+ __dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+struct dentry *dget_parent(struct dentry *dentry)
+{
+ struct dentry *ret;
+
+repeat:
+ /*
+ * Don't need rcu_dereference because we re-check it was correct under
+ * the lock.
+ */
+ rcu_read_lock();
+ ret = dentry->d_parent;
+ if (!ret) {
+ rcu_read_unlock();
+ goto out;
+ }
+ spin_lock(&ret->d_lock);
+ if (unlikely(ret != dentry->d_parent)) {
+ spin_unlock(&ret->d_lock);
+ rcu_read_unlock();
+ goto repeat;
+ }
+ rcu_read_unlock();
+ BUG_ON(!ret->d_count);
+ ret->d_count++;
+ spin_unlock(&ret->d_lock);
+out:
+ return ret;
}
-EXPORT_SYMBOL(dget_locked);
+EXPORT_SYMBOL(dget_parent);
/**
* d_find_alias - grab a hashed alias of inode
@@ -355,42 +576,51 @@ EXPORT_SYMBOL(dget_locked);
* any other hashed alias over that one unless @want_discon is set,
* in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
*/
-
-static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
+static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
{
- struct list_head *head, *next, *tmp;
- struct dentry *alias, *discon_alias=NULL;
+ struct dentry *alias, *discon_alias;
- head = &inode->i_dentry;
- next = inode->i_dentry.next;
- while (next != head) {
- tmp = next;
- next = tmp->next;
- prefetch(next);
- alias = list_entry(tmp, struct dentry, d_alias);
+again:
+ discon_alias = NULL;
+ list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ spin_lock(&alias->d_lock);
if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
if (IS_ROOT(alias) &&
- (alias->d_flags & DCACHE_DISCONNECTED))
+ (alias->d_flags & DCACHE_DISCONNECTED)) {
discon_alias = alias;
- else if (!want_discon) {
- __dget_locked(alias);
+ } else if (!want_discon) {
+ __dget_dlock(alias);
+ spin_unlock(&alias->d_lock);
+ return alias;
+ }
+ }
+ spin_unlock(&alias->d_lock);
+ }
+ if (discon_alias) {
+ alias = discon_alias;
+ spin_lock(&alias->d_lock);
+ if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
+ if (IS_ROOT(alias) &&
+ (alias->d_flags & DCACHE_DISCONNECTED)) {
+ __dget_dlock(alias);
+ spin_unlock(&alias->d_lock);
return alias;
}
}
+ spin_unlock(&alias->d_lock);
+ goto again;
}
- if (discon_alias)
- __dget_locked(discon_alias);
- return discon_alias;
+ return NULL;
}
-struct dentry * d_find_alias(struct inode *inode)
+struct dentry *d_find_alias(struct inode *inode)
{
struct dentry *de = NULL;
if (!list_empty(&inode->i_dentry)) {
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
de = __d_find_alias(inode, 0);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
}
return de;
}
@@ -404,54 +634,61 @@ void d_prune_aliases(struct inode *inode)
{
struct dentry *dentry;
restart:
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
spin_lock(&dentry->d_lock);
- if (!atomic_read(&dentry->d_count)) {
- __dget_locked(dentry);
+ if (!dentry->d_count) {
+ __dget_dlock(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
dput(dentry);
goto restart;
}
spin_unlock(&dentry->d_lock);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(d_prune_aliases);
/*
- * Throw away a dentry - free the inode, dput the parent. This requires that
- * the LRU list has already been removed.
+ * Try to throw away a dentry - free the inode, dput the parent.
+ * Requires dentry->d_lock is held, and dentry->d_count == 0.
+ * Releases dentry->d_lock.
*
- * Try to prune ancestors as well. This is necessary to prevent
- * quadratic behavior of shrink_dcache_parent(), but is also expected
- * to be beneficial in reducing dentry cache fragmentation.
+ * This may fail if locks cannot be acquired no problem, just try again.
*/
-static void prune_one_dentry(struct dentry * dentry)
+static void try_prune_one_dentry(struct dentry *dentry)
__releases(dentry->d_lock)
- __releases(dcache_lock)
- __acquires(dcache_lock)
{
- __d_drop(dentry);
- dentry = d_kill(dentry);
+ struct dentry *parent;
+ parent = dentry_kill(dentry, 0);
/*
- * Prune ancestors. Locking is simpler than in dput(),
- * because dcache_lock needs to be taken anyway.
+ * If dentry_kill returns NULL, we have nothing more to do.
+ * if it returns the same dentry, trylocks failed. In either
+ * case, just loop again.
+ *
+ * Otherwise, we need to prune ancestors too. This is necessary
+ * to prevent quadratic behavior of shrink_dcache_parent(), but
+ * is also expected to be beneficial in reducing dentry cache
+ * fragmentation.
*/
- spin_lock(&dcache_lock);
+ if (!parent)
+ return;
+ if (parent == dentry)
+ return;
+
+ /* Prune ancestors. */
+ dentry = parent;
while (dentry) {
- if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_count > 1) {
+ dentry->d_count--;
+ spin_unlock(&dentry->d_lock);
return;
-
- if (dentry->d_op && dentry->d_op->d_delete)
- dentry->d_op->d_delete(dentry);
- dentry_lru_del(dentry);
- __d_drop(dentry);
- dentry = d_kill(dentry);
- spin_lock(&dcache_lock);
+ }
+ dentry = dentry_kill(dentry, 1);
}
}
@@ -459,24 +696,35 @@ static void shrink_dentry_list(struct list_head *list)
{
struct dentry *dentry;
- while (!list_empty(list)) {
- dentry = list_entry(list->prev, struct dentry, d_lru);
- dentry_lru_del(dentry);
+ rcu_read_lock();
+ for (;;) {
+ dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
+ if (&dentry->d_lru == list)
+ break; /* empty */
+ spin_lock(&dentry->d_lock);
+ if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
+ spin_unlock(&dentry->d_lock);
+ continue;
+ }
/*
* We found an inuse dentry which was not removed from
* the LRU because of laziness during lookup. Do not free
* it - just keep it off the LRU list.
*/
- spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count)) {
+ if (dentry->d_count) {
+ dentry_lru_del(dentry);
spin_unlock(&dentry->d_lock);
continue;
}
- prune_one_dentry(dentry);
- /* dentry->d_lock was dropped in prune_one_dentry() */
- cond_resched_lock(&dcache_lock);
+
+ rcu_read_unlock();
+
+ try_prune_one_dentry(dentry);
+
+ rcu_read_lock();
}
+ rcu_read_unlock();
}
/**
@@ -495,42 +743,44 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
LIST_HEAD(tmp);
int cnt = *count;
- spin_lock(&dcache_lock);
+relock:
+ spin_lock(&dcache_lru_lock);
while (!list_empty(&sb->s_dentry_lru)) {
dentry = list_entry(sb->s_dentry_lru.prev,
struct dentry, d_lru);
BUG_ON(dentry->d_sb != sb);
+ if (!spin_trylock(&dentry->d_lock)) {
+ spin_unlock(&dcache_lru_lock);
+ cpu_relax();
+ goto relock;
+ }
+
/*
* If we are honouring the DCACHE_REFERENCED flag and the
* dentry has this flag set, don't free it. Clear the flag
* and put it back on the LRU.
*/
- if (flags & DCACHE_REFERENCED) {
- spin_lock(&dentry->d_lock);
- if (dentry->d_flags & DCACHE_REFERENCED) {
- dentry->d_flags &= ~DCACHE_REFERENCED;
- list_move(&dentry->d_lru, &referenced);
- spin_unlock(&dentry->d_lock);
- cond_resched_lock(&dcache_lock);
- continue;
- }
+ if (flags & DCACHE_REFERENCED &&
+ dentry->d_flags & DCACHE_REFERENCED) {
+ dentry->d_flags &= ~DCACHE_REFERENCED;
+ list_move(&dentry->d_lru, &referenced);
spin_unlock(&dentry->d_lock);
+ } else {
+ list_move_tail(&dentry->d_lru, &tmp);
+ spin_unlock(&dentry->d_lock);
+ if (!--cnt)
+ break;
}
-
- list_move_tail(&dentry->d_lru, &tmp);
- if (!--cnt)
- break;
- cond_resched_lock(&dcache_lock);
+ cond_resched_lock(&dcache_lru_lock);
}
-
- *count = cnt;
- shrink_dentry_list(&tmp);
-
if (!list_empty(&referenced))
list_splice(&referenced, &sb->s_dentry_lru);
- spin_unlock(&dcache_lock);
+ spin_unlock(&dcache_lru_lock);
+ shrink_dentry_list(&tmp);
+
+ *count = cnt;
}
/**
@@ -546,13 +796,12 @@ static void prune_dcache(int count)
{
struct super_block *sb, *p = NULL;
int w_count;
- int unused = percpu_counter_sum_positive(&nr_dentry_unused);
+ int unused = dentry_stat.nr_unused;
int prune_ratio;
int pruned;
if (unused == 0 || count == 0)
return;
- spin_lock(&dcache_lock);
if (count >= unused)
prune_ratio = 1;
else
@@ -589,11 +838,9 @@ static void prune_dcache(int count)
if (down_read_trylock(&sb->s_umount)) {
if ((sb->s_root != NULL) &&
(!list_empty(&sb->s_dentry_lru))) {
- spin_unlock(&dcache_lock);
__shrink_dcache_sb(sb, &w_count,
DCACHE_REFERENCED);
pruned -= w_count;
- spin_lock(&dcache_lock);
}
up_read(&sb->s_umount);
}
@@ -609,7 +856,6 @@ static void prune_dcache(int count)
if (p)
__put_super(p);
spin_unlock(&sb_lock);
- spin_unlock(&dcache_lock);
}
/**
@@ -623,12 +869,14 @@ void shrink_dcache_sb(struct super_block *sb)
{
LIST_HEAD(tmp);
- spin_lock(&dcache_lock);
+ spin_lock(&dcache_lru_lock);
while (!list_empty(&sb->s_dentry_lru)) {
list_splice_init(&sb->s_dentry_lru, &tmp);
+ spin_unlock(&dcache_lru_lock);
shrink_dentry_list(&tmp);
+ spin_lock(&dcache_lru_lock);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dcache_lru_lock);
}
EXPORT_SYMBOL(shrink_dcache_sb);
@@ -645,10 +893,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
BUG_ON(!IS_ROOT(dentry));
/* detach this root from the system */
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
dentry_lru_del(dentry);
__d_drop(dentry);
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
for (;;) {
/* descend to the first leaf in the current subtree */
@@ -657,14 +905,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
/* this is a branch with children - detach all of them
* from the system in one go */
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
list_for_each_entry(loop, &dentry->d_subdirs,
d_u.d_child) {
+ spin_lock_nested(&loop->d_lock,
+ DENTRY_D_LOCK_NESTED);
dentry_lru_del(loop);
__d_drop(loop);
- cond_resched_lock(&dcache_lock);
+ spin_unlock(&loop->d_lock);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
/* move to the first child */
dentry = list_entry(dentry->d_subdirs.next,
@@ -676,7 +926,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
do {
struct inode *inode;
- if (atomic_read(&dentry->d_count) != 0) {
+ if (dentry->d_count != 0) {
printk(KERN_ERR
"BUG: Dentry %p{i=%lx,n=%s}"
" still in use (%d)"
@@ -685,20 +935,23 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
dentry->d_inode ?
dentry->d_inode->i_ino : 0UL,
dentry->d_name.name,
- atomic_read(&dentry->d_count),
+ dentry->d_count,
dentry->d_sb->s_type->name,
dentry->d_sb->s_id);
BUG();
}
- if (IS_ROOT(dentry))
+ if (IS_ROOT(dentry)) {
parent = NULL;
- else {
+ list_del(&dentry->d_u.d_child);
+ } else {
parent = dentry->d_parent;
- atomic_dec(&parent->d_count);
+ spin_lock(&parent->d_lock);
+ parent->d_count--;
+ list_del(&dentry->d_u.d_child);
+ spin_unlock(&parent->d_lock);
}
- list_del(&dentry->d_u.d_child);
detached++;
inode = dentry->d_inode;
@@ -728,8 +981,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
/*
* destroy the dentries attached to a superblock on unmounting
- * - we don't need to use dentry->d_lock, and only need dcache_lock when
- * removing the dentry from the system lists and hashes because:
+ * - we don't need to use dentry->d_lock because:
* - the superblock is detached from all mountings and open files, so the
* dentry trees will not be rearranged by the VFS
* - s_umount is write-locked, so the memory pressure shrinker will ignore
@@ -746,11 +998,13 @@ void shrink_dcache_for_umount(struct super_block *sb)
dentry = sb->s_root;
sb->s_root = NULL;
- atomic_dec(&dentry->d_count);
+ spin_lock(&dentry->d_lock);
+ dentry->d_count--;
+ spin_unlock(&dentry->d_lock);
shrink_dcache_for_umount_subtree(dentry);
- while (!hlist_empty(&sb->s_anon)) {
- dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
+ while (!hlist_bl_empty(&sb->s_anon)) {
+ dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
shrink_dcache_for_umount_subtree(dentry);
}
}
@@ -768,15 +1022,20 @@ void shrink_dcache_for_umount(struct super_block *sb)
* Return true if the parent or its subdirectories contain
* a mount point
*/
-
int have_submounts(struct dentry *parent)
{
- struct dentry *this_parent = parent;
+ struct dentry *this_parent;
struct list_head *next;
+ unsigned seq;
+ int locked = 0;
+
+ seq = read_seqbegin(&rename_lock);
+again:
+ this_parent = parent;
- spin_lock(&dcache_lock);
if (d_mountpoint(parent))
goto positive;
+ spin_lock(&this_parent->d_lock);
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -784,27 +1043,65 @@ resume:
struct list_head *tmp = next;
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
+
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
/* Have we found a mount point ? */
- if (d_mountpoint(dentry))
+ if (d_mountpoint(dentry)) {
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&this_parent->d_lock);
goto positive;
+ }
if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
goto repeat;
}
+ spin_unlock(&dentry->d_lock);
}
/*
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- next = this_parent->d_u.d_child.next;
- this_parent = this_parent->d_parent;
+ struct dentry *tmp;
+ struct dentry *child;
+
+ tmp = this_parent->d_parent;
+ rcu_read_lock();
+ spin_unlock(&this_parent->d_lock);
+ child = this_parent;
+ this_parent = tmp;
+ spin_lock(&this_parent->d_lock);
+ /* might go back up the wrong parent if we have had a rename
+ * or deletion */
+ if (this_parent != child->d_parent ||
+ (!locked && read_seqretry(&rename_lock, seq))) {
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
+ goto rename_retry;
+ }
+ rcu_read_unlock();
+ next = child->d_u.d_child.next;
goto resume;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&this_parent->d_lock);
+ if (!locked && read_seqretry(&rename_lock, seq))
+ goto rename_retry;
+ if (locked)
+ write_sequnlock(&rename_lock);
return 0; /* No mount points found in tree */
positive:
- spin_unlock(&dcache_lock);
+ if (!locked && read_seqretry(&rename_lock, seq))
+ goto rename_retry;
+ if (locked)
+ write_sequnlock(&rename_lock);
return 1;
+
+rename_retry:
+ locked = 1;
+ write_seqlock(&rename_lock);
+ goto again;
}
EXPORT_SYMBOL(have_submounts);
@@ -824,11 +1121,16 @@ EXPORT_SYMBOL(have_submounts);
*/
static int select_parent(struct dentry * parent)
{
- struct dentry *this_parent = parent;
+ struct dentry *this_parent;
struct list_head *next;
+ unsigned seq;
int found = 0;
+ int locked = 0;
- spin_lock(&dcache_lock);
+ seq = read_seqbegin(&rename_lock);
+again:
+ this_parent = parent;
+ spin_lock(&this_parent->d_lock);
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -837,11 +1139,13 @@ resume:
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+
/*
* move only zero ref count dentries to the end
* of the unused list for prune_dcache
*/
- if (!atomic_read(&dentry->d_count)) {
+ if (!dentry->d_count) {
dentry_lru_move_tail(dentry);
found++;
} else {
@@ -853,28 +1157,63 @@ resume:
* ensures forward progress). We'll be coming back to find
* the rest.
*/
- if (found && need_resched())
+ if (found && need_resched()) {
+ spin_unlock(&dentry->d_lock);
goto out;
+ }
/*
* Descend a level if the d_subdirs list is non-empty.
*/
if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
goto repeat;
}
+
+ spin_unlock(&dentry->d_lock);
}
/*
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- next = this_parent->d_u.d_child.next;
- this_parent = this_parent->d_parent;
+ struct dentry *tmp;
+ struct dentry *child;
+
+ tmp = this_parent->d_parent;
+ rcu_read_lock();
+ spin_unlock(&this_parent->d_lock);
+ child = this_parent;
+ this_parent = tmp;
+ spin_lock(&this_parent->d_lock);
+ /* might go back up the wrong parent if we have had a rename
+ * or deletion */
+ if (this_parent != child->d_parent ||
+ (!locked && read_seqretry(&rename_lock, seq))) {
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
+ goto rename_retry;
+ }
+ rcu_read_unlock();
+ next = child->d_u.d_child.next;
goto resume;
}
out:
- spin_unlock(&dcache_lock);
+ spin_unlock(&this_parent->d_lock);
+ if (!locked && read_seqretry(&rename_lock, seq))
+ goto rename_retry;
+ if (locked)
+ write_sequnlock(&rename_lock);
return found;
+
+rename_retry:
+ if (found)
+ return found;
+ locked = 1;
+ write_seqlock(&rename_lock);
+ goto again;
}
/**
@@ -908,16 +1247,13 @@ EXPORT_SYMBOL(shrink_dcache_parent);
*/
static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
- int nr_unused;
-
if (nr) {
if (!(gfp_mask & __GFP_FS))
return -1;
prune_dcache(nr);
}
- nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
- return (nr_unused / 100) * sysctl_vfs_cache_pressure;
+ return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
static struct shrinker dcache_shrinker = {
@@ -960,38 +1296,52 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
memcpy(dname, name->name, name->len);
dname[name->len] = 0;
- atomic_set(&dentry->d_count, 1);
+ dentry->d_count = 1;
dentry->d_flags = DCACHE_UNHASHED;
spin_lock_init(&dentry->d_lock);
+ seqcount_init(&dentry->d_seq);
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
dentry->d_op = NULL;
dentry->d_fsdata = NULL;
- dentry->d_mounted = 0;
- INIT_HLIST_NODE(&dentry->d_hash);
+ INIT_HLIST_BL_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
INIT_LIST_HEAD(&dentry->d_alias);
+ INIT_LIST_HEAD(&dentry->d_u.d_child);
if (parent) {
- dentry->d_parent = dget(parent);
+ spin_lock(&parent->d_lock);
+ /*
+ * don't need child lock because it is not subject
+ * to concurrency here
+ */
+ __dget_dlock(parent);
+ dentry->d_parent = parent;
dentry->d_sb = parent->d_sb;
- } else {
- INIT_LIST_HEAD(&dentry->d_u.d_child);
- }
-
- spin_lock(&dcache_lock);
- if (parent)
list_add(&dentry->d_u.d_child, &parent->d_subdirs);
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
+ }
- percpu_counter_inc(&nr_dentry);
+ this_cpu_inc(nr_dentry);
return dentry;
}
EXPORT_SYMBOL(d_alloc);
+struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
+{
+ struct dentry *dentry = d_alloc(NULL, name);
+ if (dentry) {
+ dentry->d_sb = sb;
+ dentry->d_parent = dentry;
+ dentry->d_flags |= DCACHE_DISCONNECTED;
+ }
+ return dentry;
+}
+EXPORT_SYMBOL(d_alloc_pseudo);
+
struct dentry *d_alloc_name(struct dentry *parent, const char *name)
{
struct qstr q;
@@ -1003,12 +1353,36 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
}
EXPORT_SYMBOL(d_alloc_name);
-/* the caller must hold dcache_lock */
+void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
+{
+ BUG_ON(dentry->d_op);
+ BUG_ON(dentry->d_flags & (DCACHE_OP_HASH |
+ DCACHE_OP_COMPARE |
+ DCACHE_OP_REVALIDATE |
+ DCACHE_OP_DELETE ));
+ dentry->d_op = op;
+ if (!op)
+ return;
+ if (op->d_hash)
+ dentry->d_flags |= DCACHE_OP_HASH;
+ if (op->d_compare)
+ dentry->d_flags |= DCACHE_OP_COMPARE;
+ if (op->d_revalidate)
+ dentry->d_flags |= DCACHE_OP_REVALIDATE;
+ if (op->d_delete)
+ dentry->d_flags |= DCACHE_OP_DELETE;
+
+}
+EXPORT_SYMBOL(d_set_d_op);
+
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
+ spin_lock(&dentry->d_lock);
if (inode)
list_add(&dentry->d_alias, &inode->i_dentry);
dentry->d_inode = inode;
+ dentry_rcuwalk_barrier(dentry);
+ spin_unlock(&dentry->d_lock);
fsnotify_d_instantiate(dentry, inode);
}
@@ -1030,9 +1404,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
void d_instantiate(struct dentry *entry, struct inode * inode)
{
BUG_ON(!list_empty(&entry->d_alias));
- spin_lock(&dcache_lock);
+ if (inode)
+ spin_lock(&inode->i_lock);
__d_instantiate(entry, inode);
- spin_unlock(&dcache_lock);
+ if (inode)
+ spin_unlock(&inode->i_lock);
security_d_instantiate(entry, inode);
}
EXPORT_SYMBOL(d_instantiate);
@@ -1069,15 +1445,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct qstr *qstr = &alias->d_name;
+ /*
+ * Don't need alias->d_lock here, because aliases with
+ * d_parent == entry->d_parent are not subject to name or
+ * parent changes, because the parent inode i_mutex is held.
+ */
if (qstr->hash != hash)
continue;
if (alias->d_parent != entry->d_parent)
continue;
- if (qstr->len != len)
+ if (dentry_cmp(qstr->name, qstr->len, name, len))
continue;
- if (memcmp(qstr->name, name, len))
- continue;
- dget_locked(alias);
+ __dget(alias);
return alias;
}
@@ -1091,9 +1470,11 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
BUG_ON(!list_empty(&entry->d_alias));
- spin_lock(&dcache_lock);
+ if (inode)
+ spin_lock(&inode->i_lock);
result = __d_instantiate_unique(entry, inode);
- spin_unlock(&dcache_lock);
+ if (inode)
+ spin_unlock(&inode->i_lock);
if (!result) {
security_d_instantiate(entry, inode);
@@ -1134,14 +1515,6 @@ struct dentry * d_alloc_root(struct inode * root_inode)
}
EXPORT_SYMBOL(d_alloc_root);
-static inline struct hlist_head *d_hash(struct dentry *parent,
- unsigned long hash)
-{
- hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
- hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
- return dentry_hashtable + (hash & D_HASHMASK);
-}
-
/**
* d_obtain_alias - find or allocate a dentry for a given inode
* @inode: inode to allocate the dentry for
@@ -1182,10 +1555,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
}
tmp->d_parent = tmp; /* make sure dput doesn't croak */
- spin_lock(&dcache_lock);
+
+ spin_lock(&inode->i_lock);
res = __d_find_alias(inode, 0);
if (res) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
dput(tmp);
goto out_iput;
}
@@ -1195,12 +1569,14 @@ struct dentry *d_obtain_alias(struct inode *inode)
tmp->d_sb = inode->i_sb;
tmp->d_inode = inode;
tmp->d_flags |= DCACHE_DISCONNECTED;
- tmp->d_flags &= ~DCACHE_UNHASHED;
list_add(&tmp->d_alias, &inode->i_dentry);
- hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
+ bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
+ tmp->d_flags &= ~DCACHE_UNHASHED;
+ hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
+ __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
spin_unlock(&tmp->d_lock);
+ spin_unlock(&inode->i_lock);
- spin_unlock(&dcache_lock);
return tmp;
out_iput:
@@ -1230,18 +1606,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
struct dentry *new = NULL;
if (inode && S_ISDIR(inode->i_mode)) {
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
new = __d_find_alias(inode, 1);
if (new) {
BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
security_d_instantiate(new, inode);
d_move(new, dentry);
iput(inode);
} else {
- /* already taking dcache_lock, so d_add() by hand */
+ /* already taking inode->i_lock, so d_add() by hand */
__d_instantiate(dentry, inode);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
security_d_instantiate(dentry, inode);
d_rehash(dentry);
}
@@ -1314,10 +1690,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
* Negative dentry: instantiate it unless the inode is a directory and
* already has a dentry.
*/
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
__d_instantiate(found, inode);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
security_d_instantiate(found, inode);
return found;
}
@@ -1327,8 +1703,8 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
* reference to it, move it in place and use it.
*/
new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- dget_locked(new);
- spin_unlock(&dcache_lock);
+ __dget(new);
+ spin_unlock(&inode->i_lock);
security_d_instantiate(found, inode);
d_move(new, found);
iput(inode);
@@ -1342,6 +1718,112 @@ err_out:
EXPORT_SYMBOL(d_add_ci);
/**
+ * __d_lookup_rcu - search for a dentry (racy, store-free)
+ * @parent: parent dentry
+ * @name: qstr of name we wish to find
+ * @seq: returns d_seq value at the point where the dentry was found
+ * @inode: returns dentry->d_inode when the inode was found valid.
+ * Returns: dentry, or NULL
+ *
+ * __d_lookup_rcu is the dcache lookup function for rcu-walk name
+ * resolution (store-free path walking) design described in
+ * Documentation/filesystems/path-lookup.txt.
+ *
+ * This is not to be used outside core vfs.
+ *
+ * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
+ * held, and rcu_read_lock held. The returned dentry must not be stored into
+ * without taking d_lock and checking d_seq sequence count against @seq
+ * returned here.
+ *
+ * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
+ * function.
+ *
+ * Alternatively, __d_lookup_rcu may be called again to look up the child of
+ * the returned dentry, so long as its parent's seqlock is checked after the
+ * child is looked up. Thus, an interlocking stepping of sequence lock checks
+ * is formed, giving integrity down the path walk.
+ */
+struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
+ unsigned *seq, struct inode **inode)
+{
+ unsigned int len = name->len;
+ unsigned int hash = name->hash;
+ const unsigned char *str = name->name;
+ struct dcache_hash_bucket *b = d_hash(parent, hash);
+ struct hlist_bl_node *node;
+ struct dentry *dentry;
+
+ /*
+ * Note: There is significant duplication with __d_lookup_rcu which is
+ * required to prevent single threaded performance regressions
+ * especially on architectures where smp_rmb (in seqcounts) are costly.
+ * Keep the two functions in sync.
+ */
+
+ /*
+ * The hash list is protected using RCU.
+ *
+ * Carefully use d_seq when comparing a candidate dentry, to avoid
+ * races with d_move().
+ *
+ * It is possible that concurrent renames can mess up our list
+ * walk here and result in missing our dentry, resulting in the
+ * false-negative result. d_lookup() protects against concurrent
+ * renames using rename_lock seqlock.
+ *
+ * See Documentation/vfs/dcache-locking.txt for more details.
+ */
+ hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
+ struct inode *i;
+ const char *tname;
+ int tlen;
+
+ if (dentry->d_name.hash != hash)
+ continue;
+
+seqretry:
+ *seq = read_seqcount_begin(&dentry->d_seq);
+ if (dentry->d_parent != parent)
+ continue;
+ if (d_unhashed(dentry))
+ continue;
+ tlen = dentry->d_name.len;
+ tname = dentry->d_name.name;
+ i = dentry->d_inode;
+ prefetch(tname);
+ if (i)
+ prefetch(i);
+ /*
+ * This seqcount check is required to ensure name and
+ * len are loaded atomically, so as not to walk off the
+ * edge of memory when walking. If we could load this
+ * atomically some other way, we could drop this check.
+ */
+ if (read_seqcount_retry(&dentry->d_seq, *seq))
+ goto seqretry;
+ if (parent->d_flags & DCACHE_OP_COMPARE) {
+ if (parent->d_op->d_compare(parent, *inode,
+ dentry, i,
+ tlen, tname, name))
+ continue;
+ } else {
+ if (dentry_cmp(tname, tlen, str, len))
+ continue;
+ }
+ /*
+ * No extra seqcount check is required after the name
+ * compare. The caller must perform a seqcount check in
+ * order to do anything useful with the returned dentry
+ * anyway.
+ */
+ *inode = i;
+ return dentry;
+ }
+ return NULL;
+}
+
+/**
* d_lookup - search for a dentry
* @parent: parent dentry
* @name: qstr of name we wish to find
@@ -1352,10 +1834,10 @@ EXPORT_SYMBOL(d_add_ci);
* dentry is returned. The caller must use dput to free the entry when it has
* finished using it. %NULL is returned if the dentry does not exist.
*/
-struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
+struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
{
- struct dentry * dentry = NULL;
- unsigned long seq;
+ struct dentry *dentry;
+ unsigned seq;
do {
seq = read_seqbegin(&rename_lock);
@@ -1367,7 +1849,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
}
EXPORT_SYMBOL(d_lookup);
-/*
+/**
* __d_lookup - search for a dentry (racy)
* @parent: parent dentry
* @name: qstr of name we wish to find
@@ -1382,17 +1864,24 @@ EXPORT_SYMBOL(d_lookup);
*
* __d_lookup callers must be commented.
*/
-struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
+struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
{
unsigned int len = name->len;
unsigned int hash = name->hash;
const unsigned char *str = name->name;
- struct hlist_head *head = d_hash(parent,hash);
+ struct dcache_hash_bucket *b = d_hash(parent, hash);
+ struct hlist_bl_node *node;
struct dentry *found = NULL;
- struct hlist_node *node;
struct dentry *dentry;
/*
+ * Note: There is significant duplication with __d_lookup_rcu which is
+ * required to prevent single threaded performance regressions
+ * especially on architectures where smp_rmb (in seqcounts) are costly.
+ * Keep the two functions in sync.
+ */
+
+ /*
* The hash list is protected using RCU.
*
* Take d_lock when comparing a candidate dentry, to avoid races
@@ -1407,25 +1896,16 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
*/
rcu_read_lock();
- hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
- struct qstr *qstr;
+ hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
+ const char *tname;
+ int tlen;
if (dentry->d_name.hash != hash)
continue;
- if (dentry->d_parent != parent)
- continue;
spin_lock(&dentry->d_lock);
-
- /*
- * Recheck the dentry after taking the lock - d_move may have
- * changed things. Don't bother checking the hash because
- * we're about to compare the whole name anyway.
- */
if (dentry->d_parent != parent)
goto next;
-
- /* non-existing due to RCU? */
if (d_unhashed(dentry))
goto next;
@@ -1433,18 +1913,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
* It is safe to compare names since d_move() cannot
* change the qstr (protected by d_lock).
*/
- qstr = &dentry->d_name;
- if (parent->d_op && parent->d_op->d_compare) {
- if (parent->d_op->d_compare(parent, qstr, name))
+ tlen = dentry->d_name.len;
+ tname = dentry->d_name.name;
+ if (parent->d_flags & DCACHE_OP_COMPARE) {
+ if (parent->d_op->d_compare(parent, parent->d_inode,
+ dentry, dentry->d_inode,
+ tlen, tname, name))
goto next;
} else {
- if (qstr->len != len)
- goto next;
- if (memcmp(qstr->name, str, len))
+ if (dentry_cmp(tname, tlen, str, len))
goto next;
}
- atomic_inc(&dentry->d_count);
+ dentry->d_count++;
found = dentry;
spin_unlock(&dentry->d_lock);
break;
@@ -1473,8 +1954,8 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
* routine may choose to leave the hash value unchanged.
*/
name->hash = full_name_hash(name->name, name->len);
- if (dir->d_op && dir->d_op->d_hash) {
- if (dir->d_op->d_hash(dir, name) < 0)
+ if (dir->d_flags & DCACHE_OP_HASH) {
+ if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
goto out;
}
dentry = d_lookup(dir, name);
@@ -1483,34 +1964,32 @@ out:
}
/**
- * d_validate - verify dentry provided from insecure source
+ * d_validate - verify dentry provided from insecure source (deprecated)
* @dentry: The dentry alleged to be valid child of @dparent
* @dparent: The parent dentry (known to be valid)
*
* An insecure source has sent us a dentry, here we verify it and dget() it.
* This is used by ncpfs in its readdir implementation.
* Zero is returned in the dentry is invalid.
+ *
+ * This function is slow for big directories, and deprecated, do not use it.
*/
-int d_validate(struct dentry *dentry, struct dentry *parent)
+int d_validate(struct dentry *dentry, struct dentry *dparent)
{
- struct hlist_head *head = d_hash(parent, dentry->d_name.hash);
- struct hlist_node *node;
- struct dentry *d;
-
- /* Check whether the ptr might be valid at all.. */
- if (!kmem_ptr_validate(dentry_cache, dentry))
- return 0;
- if (dentry->d_parent != parent)
- return 0;
+ struct dentry *child;
- rcu_read_lock();
- hlist_for_each_entry_rcu(d, node, head, d_hash) {
- if (d == dentry) {
- dget(dentry);
+ spin_lock(&dparent->d_lock);
+ list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
+ if (dentry == child) {
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ __dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dparent->d_lock);
return 1;
}
}
- rcu_read_unlock();
+ spin_unlock(&dparent->d_lock);
+
return 0;
}
EXPORT_SYMBOL(d_validate);
@@ -1538,16 +2017,23 @@ EXPORT_SYMBOL(d_validate);
void d_delete(struct dentry * dentry)
{
+ struct inode *inode;
int isdir = 0;
/*
* Are we the only user?
*/
- spin_lock(&dcache_lock);
+again:
spin_lock(&dentry->d_lock);
- isdir = S_ISDIR(dentry->d_inode->i_mode);
- if (atomic_read(&dentry->d_count) == 1) {
+ inode = dentry->d_inode;
+ isdir = S_ISDIR(inode->i_mode);
+ if (dentry->d_count == 1) {
+ if (inode && !spin_trylock(&inode->i_lock)) {
+ spin_unlock(&dentry->d_lock);
+ cpu_relax();
+ goto again;
+ }
dentry->d_flags &= ~DCACHE_CANT_MOUNT;
- dentry_iput(dentry);
+ dentry_unlink_inode(dentry);
fsnotify_nameremove(dentry, isdir);
return;
}
@@ -1556,17 +2042,18 @@ void d_delete(struct dentry * dentry)
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
fsnotify_nameremove(dentry, isdir);
}
EXPORT_SYMBOL(d_delete);
-static void __d_rehash(struct dentry * entry, struct hlist_head *list)
+static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
{
-
+ BUG_ON(!d_unhashed(entry));
+ spin_lock_bucket(b);
entry->d_flags &= ~DCACHE_UNHASHED;
- hlist_add_head_rcu(&entry->d_hash, list);
+ hlist_bl_add_head_rcu(&entry->d_hash, &b->head);
+ spin_unlock_bucket(b);
}
static void _d_rehash(struct dentry * entry)
@@ -1583,25 +2070,39 @@ static void _d_rehash(struct dentry * entry)
void d_rehash(struct dentry * entry)
{
- spin_lock(&dcache_lock);
spin_lock(&entry->d_lock);
_d_rehash(entry);
spin_unlock(&entry->d_lock);
- spin_unlock(&dcache_lock);
}
EXPORT_SYMBOL(d_rehash);
-/*
- * When switching names, the actual string doesn't strictly have to
- * be preserved in the target - because we're dropping the target
- * anyway. As such, we can just do a simple memcpy() to copy over
- * the new name before we switch.
+/**
+ * dentry_update_name_case - update case insensitive dentry with a new name
+ * @dentry: dentry to be updated
+ * @name: new name
*
- * Note that we have to be a lot more careful about getting the hash
- * switched - we have to switch the hash value properly even if it
- * then no longer matches the actual (corrupted) string of the target.
- * The hash value has to match the hash queue that the dentry is on..
+ * Update a case insensitive dentry with new case of name.
+ *
+ * dentry must have been returned by d_lookup with name @name. Old and new
+ * name lengths must match (ie. no d_compare which allows mismatched name
+ * lengths).
+ *
+ * Parent inode i_mutex must be held over d_lookup and into this call (to
+ * keep renames and concurrent inserts, and readdir(2) away).
*/
+void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
+{
+ BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+ BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
+
+ spin_lock(&dentry->d_lock);
+ write_seqcount_begin(&dentry->d_seq);
+ memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
+ write_seqcount_end(&dentry->d_seq);
+ spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL(dentry_update_name_case);
+
static void switch_names(struct dentry *dentry, struct dentry *target)
{
if (dname_external(target)) {
@@ -1643,54 +2144,84 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
swap(dentry->d_name.len, target->d_name.len);
}
+static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
+{
+ /*
+ * XXXX: do we really need to take target->d_lock?
+ */
+ if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
+ spin_lock(&target->d_parent->d_lock);
+ else {
+ if (d_ancestor(dentry->d_parent, target->d_parent)) {
+ spin_lock(&dentry->d_parent->d_lock);
+ spin_lock_nested(&target->d_parent->d_lock,
+ DENTRY_D_LOCK_NESTED);
+ } else {
+ spin_lock(&target->d_parent->d_lock);
+ spin_lock_nested(&dentry->d_parent->d_lock,
+ DENTRY_D_LOCK_NESTED);
+ }
+ }
+ if (target < dentry) {
+ spin_lock_nested(&target->d_lock, 2);
+ spin_lock_nested(&dentry->d_lock, 3);
+ } else {
+ spin_lock_nested(&dentry->d_lock, 2);
+ spin_lock_nested(&target->d_lock, 3);
+ }
+}
+
+static void dentry_unlock_parents_for_move(struct dentry *dentry,
+ struct dentry *target)
+{
+ if (target->d_parent != dentry->d_parent)
+ spin_unlock(&dentry->d_parent->d_lock);
+ if (target->d_parent != target)
+ spin_unlock(&target->d_parent->d_lock);
+}
+
/*
- * We cannibalize "target" when moving dentry on top of it,
- * because it's going to be thrown away anyway. We could be more
- * polite about it, though.
- *
- * This forceful removal will result in ugly /proc output if
- * somebody holds a file open that got deleted due to a rename.
- * We could be nicer about the deleted file, and let it show
- * up under the name it had before it was deleted rather than
- * under the original name of the file that was moved on top of it.
+ * When switching names, the actual string doesn't strictly have to
+ * be preserved in the target - because we're dropping the target
+ * anyway. As such, we can just do a simple memcpy() to copy over
+ * the new name before we switch.
+ *
+ * Note that we have to be a lot more careful about getting the hash
+ * switched - we have to switch the hash value properly even if it
+ * then no longer matches the actual (corrupted) string of the target.
+ * The hash value has to match the hash queue that the dentry is on..
*/
-
/*
- * d_move_locked - move a dentry
+ * d_move - move a dentry
* @dentry: entry to move
* @target: new dentry
*
* Update the dcache to reflect the move of a file name. Negative
* dcache entries should not be moved in this way.
*/
-static void d_move_locked(struct dentry * dentry, struct dentry * target)
+void d_move(struct dentry * dentry, struct dentry * target)
{
- struct hlist_head *list;
-
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
+ BUG_ON(d_ancestor(dentry, target));
+ BUG_ON(d_ancestor(target, dentry));
+
write_seqlock(&rename_lock);
- /*
- * XXXX: do we really need to take target->d_lock?
- */
- if (target < dentry) {
- spin_lock(&target->d_lock);
- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- } else {
- spin_lock(&dentry->d_lock);
- spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
- }
- /* Move the dentry to the target hash queue, if on different bucket */
- if (d_unhashed(dentry))
- goto already_unhashed;
+ dentry_lock_for_move(dentry, target);
- hlist_del_rcu(&dentry->d_hash);
+ write_seqcount_begin(&dentry->d_seq);
+ write_seqcount_begin(&target->d_seq);
-already_unhashed:
- list = d_hash(target->d_parent, target->d_name.hash);
- __d_rehash(dentry, list);
+ /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
+
+ /*
+ * Move the dentry to the target hash queue. Don't bother checking
+ * for the same hash queue because of how unlikely it is.
+ */
+ __d_drop(dentry);
+ __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
/* Unhash the target: dput() will then get rid of it */
__d_drop(target);
@@ -1715,27 +2246,16 @@ already_unhashed:
}
list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+
+ write_seqcount_end(&target->d_seq);
+ write_seqcount_end(&dentry->d_seq);
+
+ dentry_unlock_parents_for_move(dentry, target);
spin_unlock(&target->d_lock);
fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock);
write_sequnlock(&rename_lock);
}
-
-/**
- * d_move - move a dentry
- * @dentry: entry to move
- * @target: new dentry
- *
- * Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.
- */
-
-void d_move(struct dentry * dentry, struct dentry * target)
-{
- spin_lock(&dcache_lock);
- d_move_locked(dentry, target);
- spin_unlock(&dcache_lock);
-}
EXPORT_SYMBOL(d_move);
/**
@@ -1761,13 +2281,13 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
* This helper attempts to cope with remotely renamed directories
*
* It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex and the dcache_lock
+ * dentry->d_parent->d_inode->i_mutex and the inode->i_lock
*
* Note: If ever the locking in lock_rename() changes, then please
* remember to update this too...
*/
-static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
- __releases(dcache_lock)
+static struct dentry *__d_unalias(struct inode *inode,
+ struct dentry *dentry, struct dentry *alias)
{
struct mutex *m1 = NULL, *m2 = NULL;
struct dentry *ret;
@@ -1790,10 +2310,10 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
goto out_err;
m2 = &alias->d_parent->d_inode->i_mutex;
out_unalias:
- d_move_locked(alias, dentry);
+ d_move(alias, dentry);
ret = alias;
out_err:
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
if (m2)
mutex_unlock(m2);
if (m1)
@@ -1804,17 +2324,23 @@ out_err:
/*
* Prepare an anonymous dentry for life in the superblock's dentry tree as a
* named dentry in place of the dentry to be replaced.
+ * returns with anon->d_lock held!
*/
static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
{
struct dentry *dparent, *aparent;
- switch_names(dentry, anon);
- swap(dentry->d_name.hash, anon->d_name.hash);
+ dentry_lock_for_move(anon, dentry);
+
+ write_seqcount_begin(&dentry->d_seq);
+ write_seqcount_begin(&anon->d_seq);
dparent = dentry->d_parent;
aparent = anon->d_parent;
+ switch_names(dentry, anon);
+ swap(dentry->d_name.hash, anon->d_name.hash);
+
dentry->d_parent = (aparent == anon) ? dentry : aparent;
list_del(&dentry->d_u.d_child);
if (!IS_ROOT(dentry))
@@ -1829,6 +2355,13 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
else
INIT_LIST_HEAD(&anon->d_u.d_child);
+ write_seqcount_end(&dentry->d_seq);
+ write_seqcount_end(&anon->d_seq);
+
+ dentry_unlock_parents_for_move(anon, dentry);
+ spin_unlock(&dentry->d_lock);
+
+ /* anon->d_lock still locked, returns locked */
anon->d_flags &= ~DCACHE_DISCONNECTED;
}
@@ -1846,14 +2379,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
BUG_ON(!d_unhashed(dentry));
- spin_lock(&dcache_lock);
-
if (!inode) {
actual = dentry;
__d_instantiate(dentry, NULL);
- goto found_lock;
+ d_rehash(actual);
+ goto out_nolock;
}
+ spin_lock(&inode->i_lock);
+
if (S_ISDIR(inode->i_mode)) {
struct dentry *alias;
@@ -1864,13 +2398,12 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
/* Is this an anonymous mountpoint that we could splice
* into our tree? */
if (IS_ROOT(alias)) {
- spin_lock(&alias->d_lock);
__d_materialise_dentry(dentry, alias);
__d_drop(alias);
goto found;
}
/* Nope, but we must(!) avoid directory aliasing */
- actual = __d_unalias(dentry, alias);
+ actual = __d_unalias(inode, dentry, alias);
if (IS_ERR(actual))
dput(alias);
goto out_nolock;
@@ -1881,15 +2414,14 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
actual = __d_instantiate_unique(dentry, inode);
if (!actual)
actual = dentry;
- else if (unlikely(!d_unhashed(actual)))
- goto shouldnt_be_hashed;
+ else
+ BUG_ON(!d_unhashed(actual));
-found_lock:
spin_lock(&actual->d_lock);
found:
_d_rehash(actual);
spin_unlock(&actual->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
out_nolock:
if (actual == dentry) {
security_d_instantiate(dentry, inode);
@@ -1898,10 +2430,6 @@ out_nolock:
iput(inode);
return actual;
-
-shouldnt_be_hashed:
- spin_unlock(&dcache_lock);
- BUG();
}
EXPORT_SYMBOL_GPL(d_materialise_unique);
@@ -1928,7 +2456,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
* @buffer: pointer to the end of the buffer
* @buflen: pointer to buffer length
*
- * Caller holds the dcache_lock.
+ * Caller holds the rename_lock.
*
* If path is not reachable from the supplied root, then the value of
* root is changed (without modifying refcounts).
@@ -1956,7 +2484,9 @@ static int prepend_path(const struct path *path, struct path *root,
}
parent = dentry->d_parent;
prefetch(parent);
+ spin_lock(&dentry->d_lock);
error = prepend_name(buffer, buflen, &dentry->d_name);
+ spin_unlock(&dentry->d_lock);
if (!error)
error = prepend(buffer, buflen, "/", 1);
if (error)
@@ -2012,9 +2542,9 @@ char *__d_path(const struct path *path, struct path *root,
int error;
prepend(&res, &buflen, "\0", 1);
- spin_lock(&dcache_lock);
+ write_seqlock(&rename_lock);
error = prepend_path(path, root, &res, &buflen);
- spin_unlock(&dcache_lock);
+ write_sequnlock(&rename_lock);
if (error)
return ERR_PTR(error);
@@ -2076,12 +2606,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
get_fs_root(current->fs, &root);
- spin_lock(&dcache_lock);
+ write_seqlock(&rename_lock);
tmp = root;
error = path_with_deleted(path, &tmp, &res, &buflen);
if (error)
res = ERR_PTR(error);
- spin_unlock(&dcache_lock);
+ write_sequnlock(&rename_lock);
path_put(&root);
return res;
}
@@ -2107,12 +2637,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
get_fs_root(current->fs, &root);
- spin_lock(&dcache_lock);
+ write_seqlock(&rename_lock);
tmp = root;
error = path_with_deleted(path, &tmp, &res, &buflen);
if (!error && !path_equal(&tmp, &root))
error = prepend_unreachable(&res, &buflen);
- spin_unlock(&dcache_lock);
+ write_sequnlock(&rename_lock);
path_put(&root);
if (error)
res = ERR_PTR(error);
@@ -2144,7 +2674,7 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
/*
* Write full pathname from the root of the filesystem into the buffer.
*/
-char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
+static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
{
char *end = buf + buflen;
char *retval;
@@ -2158,10 +2688,13 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
while (!IS_ROOT(dentry)) {
struct dentry *parent = dentry->d_parent;
+ int error;
prefetch(parent);
- if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) ||
- (prepend(&end, &buflen, "/", 1) != 0))
+ spin_lock(&dentry->d_lock);
+ error = prepend_name(&end, &buflen, &dentry->d_name);
+ spin_unlock(&dentry->d_lock);
+ if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
goto Elong;
retval = end;
@@ -2171,14 +2704,25 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
Elong:
return ERR_PTR(-ENAMETOOLONG);
}
-EXPORT_SYMBOL(__dentry_path);
+
+char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
+{
+ char *retval;
+
+ write_seqlock(&rename_lock);
+ retval = __dentry_path(dentry, buf, buflen);
+ write_sequnlock(&rename_lock);
+
+ return retval;
+}
+EXPORT_SYMBOL(dentry_path_raw);
char *dentry_path(struct dentry *dentry, char *buf, int buflen)
{
char *p = NULL;
char *retval;
- spin_lock(&dcache_lock);
+ write_seqlock(&rename_lock);
if (d_unlinked(dentry)) {
p = buf + buflen;
if (prepend(&p, &buflen, "//deleted", 10) != 0)
@@ -2186,12 +2730,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
buflen++;
}
retval = __dentry_path(dentry, buf, buflen);
- spin_unlock(&dcache_lock);
+ write_sequnlock(&rename_lock);
if (!IS_ERR(retval) && p)
*p = '/'; /* restore '/' overriden with '\0' */
return retval;
Elong:
- spin_unlock(&dcache_lock);
return ERR_PTR(-ENAMETOOLONG);
}
@@ -2225,7 +2768,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
get_fs_root_and_pwd(current->fs, &root, &pwd);
error = -ENOENT;
- spin_lock(&dcache_lock);
+ write_seqlock(&rename_lock);
if (!d_unlinked(pwd.dentry)) {
unsigned long len;
struct path tmp = root;
@@ -2234,7 +2777,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
prepend(&cwd, &buflen, "\0", 1);
error = prepend_path(&pwd, &tmp, &cwd, &buflen);
- spin_unlock(&dcache_lock);
+ write_sequnlock(&rename_lock);
if (error)
goto out;
@@ -2253,8 +2796,9 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
if (copy_to_user(buf, cwd, len))
error = -EFAULT;
}
- } else
- spin_unlock(&dcache_lock);
+ } else {
+ write_sequnlock(&rename_lock);
+ }
out:
path_put(&pwd);
@@ -2282,25 +2826,25 @@ out:
int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
{
int result;
- unsigned long seq;
+ unsigned seq;
if (new_dentry == old_dentry)
return 1;
- /*
- * Need rcu_readlock to protect against the d_parent trashing
- * due to d_move
- */
- rcu_read_lock();
do {
/* for restarting inner loop in case of seq retry */
seq = read_seqbegin(&rename_lock);
+ /*
+ * Need rcu_readlock to protect against the d_parent trashing
+ * due to d_move
+ */
+ rcu_read_lock();
if (d_ancestor(old_dentry, new_dentry))
result = 1;
else
result = 0;
+ rcu_read_unlock();
} while (read_seqretry(&rename_lock, seq));
- rcu_read_unlock();
return result;
}
@@ -2332,10 +2876,15 @@ EXPORT_SYMBOL(path_is_under);
void d_genocide(struct dentry *root)
{
- struct dentry *this_parent = root;
+ struct dentry *this_parent;
struct list_head *next;
+ unsigned seq;
+ int locked = 0;
- spin_lock(&dcache_lock);
+ seq = read_seqbegin(&rename_lock);
+again:
+ this_parent = root;
+ spin_lock(&this_parent->d_lock);
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -2343,21 +2892,62 @@ resume:
struct list_head *tmp = next;
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
- if (d_unhashed(dentry)||!dentry->d_inode)
+
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ if (d_unhashed(dentry) || !dentry->d_inode) {
+ spin_unlock(&dentry->d_lock);
continue;
+ }
if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
goto repeat;
}
- atomic_dec(&dentry->d_count);
+ if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
+ dentry->d_flags |= DCACHE_GENOCIDE;
+ dentry->d_count--;
+ }
+ spin_unlock(&dentry->d_lock);
}
if (this_parent != root) {
- next = this_parent->d_u.d_child.next;
- atomic_dec(&this_parent->d_count);
- this_parent = this_parent->d_parent;
+ struct dentry *tmp;
+ struct dentry *child;
+
+ tmp = this_parent->d_parent;
+ if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
+ this_parent->d_flags |= DCACHE_GENOCIDE;
+ this_parent->d_count--;
+ }
+ rcu_read_lock();
+ spin_unlock(&this_parent->d_lock);
+ child = this_parent;
+ this_parent = tmp;
+ spin_lock(&this_parent->d_lock);
+ /* might go back up the wrong parent if we have had a rename
+ * or deletion */
+ if (this_parent != child->d_parent ||
+ (!locked && read_seqretry(&rename_lock, seq))) {
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
+ goto rename_retry;
+ }
+ rcu_read_unlock();
+ next = child->d_u.d_child.next;
goto resume;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&this_parent->d_lock);
+ if (!locked && read_seqretry(&rename_lock, seq))
+ goto rename_retry;
+ if (locked)
+ write_sequnlock(&rename_lock);
+ return;
+
+rename_retry:
+ locked = 1;
+ write_seqlock(&rename_lock);
+ goto again;
}
/**
@@ -2411,7 +3001,7 @@ static void __init dcache_init_early(void)
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
- sizeof(struct hlist_head),
+ sizeof(struct dcache_hash_bucket),
dhash_entries,
13,
HASH_EARLY,
@@ -2420,16 +3010,13 @@ static void __init dcache_init_early(void)
0);
for (loop = 0; loop < (1 << d_hash_shift); loop++)
- INIT_HLIST_HEAD(&dentry_hashtable[loop]);
+ INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
}
static void __init dcache_init(void)
{
int loop;
- percpu_counter_init(&nr_dentry, 0);
- percpu_counter_init(&nr_dentry_unused, 0);
-
/*
* A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature
@@ -2446,7 +3033,7 @@ static void __init dcache_init(void)
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
- sizeof(struct hlist_head),
+ sizeof(struct dcache_hash_bucket),
dhash_entries,
13,
0,
@@ -2455,7 +3042,7 @@ static void __init dcache_init(void)
0);
for (loop = 0; loop < (1 << d_hash_shift); loop++)
- INIT_HLIST_HEAD(&dentry_hashtable[loop]);
+ INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
}
/* SLAB cache for __getname() consumers */
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a4ed838..37a8ca7 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -135,17 +135,17 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
}
-static int debug_get_sb(struct file_system_type *fs_type,
+static struct dentry *debug_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_single(fs_type, flags, data, debug_fill_super, mnt);
+ return mount_single(fs_type, flags, data, debug_fill_super);
}
static struct file_system_type debug_fs_type = {
.owner = THIS_MODULE,
.name = "debugfs",
- .get_sb = debug_get_sb,
+ .mount = debug_mount,
.kill_sb = kill_litter_super,
};
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 8b3ffd5..1bb547c 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -331,7 +331,7 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
}
/*
- * devpts_get_sb()
+ * devpts_mount()
*
* If the '-o newinstance' mount option was specified, mount a new
* (private) instance of devpts. PTYs created in this instance are
@@ -345,20 +345,20 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
* semantics in devpts while preserving backward compatibility of the
* current 'single-namespace' semantics. i.e all mounts of devpts
* without the 'newinstance' mount option should bind to the initial
- * kernel mount, like get_sb_single().
+ * kernel mount, like mount_single().
*
* Mounts with 'newinstance' option create a new, private namespace.
*
* NOTE:
*
- * For single-mount semantics, devpts cannot use get_sb_single(),
- * because get_sb_single()/sget() find and use the super-block from
+ * For single-mount semantics, devpts cannot use mount_single(),
+ * because mount_single()/sget() find and use the super-block from
* the most recent mount of devpts. But that recent mount may be a
- * 'newinstance' mount and get_sb_single() would pick the newinstance
+ * 'newinstance' mount and mount_single() would pick the newinstance
* super-block instead of the initial super-block.
*/
-static int devpts_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *devpts_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
int error;
struct pts_mount_opts opts;
@@ -366,7 +366,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
error = parse_mount_options(data, PARSE_MOUNT, &opts);
if (error)
- return error;
+ return ERR_PTR(error);
if (opts.newinstance)
s = sget(fs_type, NULL, set_anon_super, NULL);
@@ -374,7 +374,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
if (IS_ERR(s))
- return PTR_ERR(s);
+ return ERR_CAST(s);
if (!s->s_root) {
s->s_flags = flags;
@@ -390,13 +390,11 @@ static int devpts_get_sb(struct file_system_type *fs_type,
if (error)
goto out_undo_sget;
- simple_set_mnt(mnt, s);
-
- return 0;
+ return dget(s->s_root);
out_undo_sget:
deactivate_locked_super(s);
- return error;
+ return ERR_PTR(error);
}
#else
@@ -404,10 +402,10 @@ out_undo_sget:
* This supports only the legacy single-instance semantics (no
* multiple-instance semantics)
*/
-static int devpts_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
- return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
+ return mount_single(fs_type, flags, data, devpts_fill_super);
}
#endif
@@ -421,7 +419,7 @@ static void devpts_kill_sb(struct super_block *sb)
static struct file_system_type devpts_fs_type = {
.name = "devpts",
- .get_sb = devpts_get_sb,
+ .mount = devpts_mount,
.kill_sb = devpts_kill_sb,
};
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 37a34c2..9c64ae9 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -63,6 +63,9 @@
#define NEEDED_RMEM (4*1024*1024)
#define CONN_HASH_SIZE 32
+/* Number of messages to send before rescheduling */
+#define MAX_SEND_MSG_COUNT 25
+
struct cbuf {
unsigned int base;
unsigned int len;
@@ -108,6 +111,7 @@ struct connection {
#define CF_INIT_PENDING 4
#define CF_IS_OTHERCON 5
#define CF_CLOSE 6
+#define CF_APP_LIMITED 7
struct list_head writequeue; /* List of outgoing writequeue_entries */
spinlock_t writequeue_lock;
int (*rx_action) (struct connection *); /* What to do when active */
@@ -295,7 +299,17 @@ static void lowcomms_write_space(struct sock *sk)
{
struct connection *con = sock2con(sk);
- if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+ if (!con)
+ return;
+
+ clear_bit(SOCK_NOSPACE, &con->sock->flags);
+
+ if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
+ con->sock->sk->sk_write_pending--;
+ clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags);
+ }
+
+ if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
queue_work(send_workqueue, &con->swork);
}
@@ -915,6 +929,7 @@ static void tcp_connect_to_sock(struct connection *con)
struct sockaddr_storage saddr, src_addr;
int addr_len;
struct socket *sock = NULL;
+ int one = 1;
if (con->nodeid == 0) {
log_print("attempt to connect sock 0 foiled");
@@ -960,6 +975,11 @@ static void tcp_connect_to_sock(struct connection *con)
make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
log_print("connecting to %d", con->nodeid);
+
+ /* Turn off Nagle's algorithm */
+ kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
+ sizeof(one));
+
result =
sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
O_NONBLOCK);
@@ -1011,6 +1031,10 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
goto create_out;
}
+ /* Turn off Nagle's algorithm */
+ kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
+ sizeof(one));
+
result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
(char *)&one, sizeof(one));
@@ -1297,6 +1321,7 @@ static void send_to_sock(struct connection *con)
const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
struct writequeue_entry *e;
int len, offset;
+ int count = 0;
mutex_lock(&con->sock_mutex);
if (con->sock == NULL)
@@ -1319,14 +1344,27 @@ static void send_to_sock(struct connection *con)
ret = kernel_sendpage(con->sock, e->page, offset, len,
msg_flags);
if (ret == -EAGAIN || ret == 0) {
+ if (ret == -EAGAIN &&
+ test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) &&
+ !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
+ /* Notify TCP that we're limited by the
+ * application window size.
+ */
+ set_bit(SOCK_NOSPACE, &con->sock->flags);
+ con->sock->sk->sk_write_pending++;
+ }
cond_resched();
goto out;
}
if (ret <= 0)
goto send_error;
}
- /* Don't starve people filling buffers */
+
+ /* Don't starve people filling buffers */
+ if (++count >= MAX_SEND_MSG_COUNT) {
cond_resched();
+ count = 0;
+ }
spin_lock(&con->writequeue_lock);
e->offset += ret;
@@ -1430,20 +1468,19 @@ static void work_stop(void)
static int work_start(void)
{
- int error;
- recv_workqueue = create_workqueue("dlm_recv");
- error = IS_ERR(recv_workqueue);
- if (error) {
- log_print("can't start dlm_recv %d", error);
- return error;
+ recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM |
+ WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+ if (!recv_workqueue) {
+ log_print("can't start dlm_recv");
+ return -ENOMEM;
}
- send_workqueue = create_singlethread_workqueue("dlm_send");
- error = IS_ERR(send_workqueue);
- if (error) {
- log_print("can't start dlm_send %d", error);
+ send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM |
+ WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+ if (!send_workqueue) {
+ log_print("can't start dlm_send");
destroy_workqueue(recv_workqueue);
- return error;
+ return -ENOMEM;
}
return 0;
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 906e803..6fc4f31 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -44,12 +44,17 @@
*/
static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
- struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
+ struct dentry *lower_dentry;
+ struct vfsmount *lower_mnt;
struct dentry *dentry_save;
struct vfsmount *vfsmount_save;
int rc = 1;
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
goto out;
dentry_save = nd->path.dentry;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 40186b9..413a3c4 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -377,6 +377,7 @@ struct ecryptfs_mount_crypt_stat {
#define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES 0x00000010
#define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK 0x00000020
#define ECRYPTFS_GLOBAL_ENCFN_USE_FEK 0x00000040
+#define ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY 0x00000080
u32 flags;
struct list_head global_auth_tok_list;
struct mutex global_auth_tok_list_mutex;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 3fbc942..337352a 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -32,6 +32,7 @@
#include <linux/crypto.h>
#include <linux/fs_stack.h>
#include <linux/slab.h>
+#include <linux/xattr.h>
#include <asm/unaligned.h>
#include "ecryptfs_kernel.h"
@@ -70,15 +71,19 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
struct dentry *dentry_save;
struct vfsmount *vfsmount_save;
+ unsigned int flags_save;
int rc;
dentry_save = nd->path.dentry;
vfsmount_save = nd->path.mnt;
+ flags_save = nd->flags;
nd->path.dentry = lower_dentry;
nd->path.mnt = lower_mnt;
+ nd->flags &= ~LOOKUP_OPEN;
rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
nd->path.dentry = dentry_save;
nd->path.mnt = vfsmount_save;
+ nd->flags = flags_save;
return rc;
}
@@ -255,7 +260,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
ecryptfs_dentry->d_parent));
lower_inode = lower_dentry->d_inode;
fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode);
- BUG_ON(!atomic_read(&lower_dentry->d_count));
+ BUG_ON(!lower_dentry->d_count);
ecryptfs_set_dentry_private(ecryptfs_dentry,
kmem_cache_alloc(ecryptfs_dentry_info_cache,
GFP_KERNEL));
@@ -436,7 +441,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
struct qstr lower_name;
int rc = 0;
- ecryptfs_dentry->d_op = &ecryptfs_dops;
+ d_set_d_op(ecryptfs_dentry, &ecryptfs_dops);
if ((ecryptfs_dentry->d_name.len == 1
&& !strcmp(ecryptfs_dentry->d_name.name, "."))
|| (ecryptfs_dentry->d_name.len == 2
@@ -449,7 +454,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
lower_name.hash = ecryptfs_dentry->d_name.hash;
if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
- &lower_name);
+ lower_dir_dentry->d_inode, &lower_name);
if (rc < 0)
goto out_d_drop;
}
@@ -484,7 +489,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
lower_name.hash = full_name_hash(lower_name.name, lower_name.len);
if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
- &lower_name);
+ lower_dir_dentry->d_inode, &lower_name);
if (rc < 0)
goto out_d_drop;
}
@@ -975,8 +980,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
}
static int
-ecryptfs_permission(struct inode *inode, int mask)
+ecryptfs_permission(struct inode *inode, int mask, unsigned int flags)
{
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
return inode_permission(ecryptfs_inode_to_lower(inode), mask);
}
@@ -1108,10 +1115,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
rc = -EOPNOTSUPP;
goto out;
}
- mutex_lock(&lower_dentry->d_inode->i_mutex);
- rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value,
- size, flags);
- mutex_unlock(&lower_dentry->d_inode->i_mutex);
+
+ rc = vfs_setxattr(lower_dentry, name, value, size, flags);
out:
return rc;
}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 73811cf..b1f6858 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -446,6 +446,7 @@ out:
*/
static int
ecryptfs_find_auth_tok_for_sig(
+ struct key **auth_tok_key,
struct ecryptfs_auth_tok **auth_tok,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
char *sig)
@@ -453,12 +454,21 @@ ecryptfs_find_auth_tok_for_sig(
struct ecryptfs_global_auth_tok *global_auth_tok;
int rc = 0;
+ (*auth_tok_key) = NULL;
(*auth_tok) = NULL;
if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
mount_crypt_stat, sig)) {
- struct key *auth_tok_key;
- rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok,
+ /* if the flag ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY is set in the
+ * mount_crypt_stat structure, we prevent to use auth toks that
+ * are not inserted through the ecryptfs_add_global_auth_tok
+ * function.
+ */
+ if (mount_crypt_stat->flags
+ & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
+ return -EINVAL;
+
+ rc = ecryptfs_keyring_auth_tok_for_sig(auth_tok_key, auth_tok,
sig);
} else
(*auth_tok) = global_auth_tok->global_auth_tok;
@@ -509,6 +519,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
char *filename, size_t filename_size)
{
struct ecryptfs_write_tag_70_packet_silly_stack *s;
+ struct key *auth_tok_key = NULL;
int rc = 0;
s = kmalloc(sizeof(*s), GFP_KERNEL);
@@ -606,6 +617,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
}
dest[s->i++] = s->cipher_code;
rc = ecryptfs_find_auth_tok_for_sig(
+ &auth_tok_key,
&s->auth_tok, mount_crypt_stat,
mount_crypt_stat->global_default_fnek_sig);
if (rc) {
@@ -753,6 +765,8 @@ out_free_unlock:
out_unlock:
mutex_unlock(s->tfm_mutex);
out:
+ if (auth_tok_key)
+ key_put(auth_tok_key);
kfree(s);
return rc;
}
@@ -798,6 +812,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
char *data, size_t max_packet_size)
{
struct ecryptfs_parse_tag_70_packet_silly_stack *s;
+ struct key *auth_tok_key = NULL;
int rc = 0;
(*packet_size) = 0;
@@ -910,7 +925,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
* >= ECRYPTFS_MAX_IV_BYTES. */
memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
s->desc.info = s->iv;
- rc = ecryptfs_find_auth_tok_for_sig(&s->auth_tok, mount_crypt_stat,
+ rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
+ &s->auth_tok, mount_crypt_stat,
s->fnek_sig_hex);
if (rc) {
printk(KERN_ERR "%s: Error attempting to find auth tok for "
@@ -986,6 +1002,8 @@ out:
(*filename_size) = 0;
(*filename) = NULL;
}
+ if (auth_tok_key)
+ key_put(auth_tok_key);
kfree(s);
return rc;
}
@@ -1557,14 +1575,19 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
ECRYPTFS_VERSION_MAJOR,
ECRYPTFS_VERSION_MINOR);
rc = -EINVAL;
- goto out;
+ goto out_release_key;
}
if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
&& (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
printk(KERN_ERR "Invalid auth_tok structure "
"returned from key query\n");
rc = -EINVAL;
- goto out;
+ goto out_release_key;
+ }
+out_release_key:
+ if (rc) {
+ key_put(*auth_tok_key);
+ (*auth_tok_key) = NULL;
}
out:
return rc;
@@ -1688,6 +1711,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
size_t tag_11_contents_size;
size_t tag_11_packet_size;
+ struct key *auth_tok_key = NULL;
int rc = 0;
INIT_LIST_HEAD(&auth_tok_list);
@@ -1784,6 +1808,10 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
* just one will be sufficient to decrypt to get the FEK. */
find_next_matching_auth_tok:
found_auth_tok = 0;
+ if (auth_tok_key) {
+ key_put(auth_tok_key);
+ auth_tok_key = NULL;
+ }
list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) {
candidate_auth_tok = &auth_tok_list_item->auth_tok;
if (unlikely(ecryptfs_verbosity > 0)) {
@@ -1800,10 +1828,11 @@ find_next_matching_auth_tok:
rc = -EINVAL;
goto out_wipe_list;
}
- ecryptfs_find_auth_tok_for_sig(&matching_auth_tok,
+ rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
+ &matching_auth_tok,
crypt_stat->mount_crypt_stat,
candidate_auth_tok_sig);
- if (matching_auth_tok) {
+ if (!rc) {
found_auth_tok = 1;
goto found_matching_auth_tok;
}
@@ -1866,6 +1895,8 @@ found_matching_auth_tok:
out_wipe_list:
wipe_auth_tok_list(&auth_tok_list);
out:
+ if (auth_tok_key)
+ key_put(auth_tok_key);
return rc;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index cbd4e18..3510386 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
if (special_file(lower_inode->i_mode))
init_special_inode(inode, lower_inode->i_mode,
lower_inode->i_rdev);
- dentry->d_op = &ecryptfs_dops;
+ d_set_d_op(dentry, &ecryptfs_dops);
fsstack_copy_attr_all(inode, lower_inode);
/* This size will be overwritten for real files w/ headers and
* other metadata */
@@ -208,7 +208,8 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
- ecryptfs_opt_unlink_sigs, ecryptfs_opt_err };
+ ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only,
+ ecryptfs_opt_err };
static const match_table_t tokens = {
{ecryptfs_opt_sig, "sig=%s"},
@@ -223,6 +224,7 @@ static const match_table_t tokens = {
{ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
{ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
{ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
+ {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"},
{ecryptfs_opt_err, NULL}
};
@@ -406,6 +408,10 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
case ecryptfs_opt_unlink_sigs:
mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS;
break;
+ case ecryptfs_opt_mount_auth_tok_only:
+ mount_crypt_stat->flags |=
+ ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY;
+ break;
case ecryptfs_opt_err:
default:
printk(KERN_WARNING
@@ -540,9 +546,8 @@ out:
* ecryptfs_interpose to perform most of the linking
* ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
*/
-static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data,
- struct vfsmount *mnt)
+static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
{
struct super_block *s;
struct ecryptfs_sb_info *sbi;
@@ -589,7 +594,7 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
deactivate_locked_super(s);
goto out;
}
- s->s_root->d_op = &ecryptfs_dops;
+ d_set_d_op(s->s_root, &ecryptfs_dops);
s->s_root->d_sb = s;
s->s_root->d_parent = s->s_root;
@@ -607,8 +612,7 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
err = "Reading sb failed";
goto out;
}
- simple_set_mnt(mnt, s);
- return 0;
+ return dget(s->s_root);
out:
if (sbi) {
@@ -616,7 +620,7 @@ out:
kmem_cache_free(ecryptfs_sb_info_cache, sbi);
}
printk(KERN_ERR "%s; rc = [%d]\n", err, rc);
- return rc;
+ return ERR_PTR(rc);
}
/**
@@ -639,7 +643,7 @@ static void ecryptfs_kill_block_super(struct super_block *sb)
static struct file_system_type ecryptfs_fs_type = {
.owner = THIS_MODULE,
.name = "ecryptfs",
- .get_sb = ecryptfs_get_sb,
+ .mount = ecryptfs_mount,
.kill_sb = ecryptfs_kill_block_super,
.fs_flags = 0
};
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index f7fc286..3042fe1 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -28,7 +28,6 @@
#include <linux/key.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
-#include <linux/smp_lock.h>
#include <linux/file.h>
#include <linux/crypto.h>
#include "ecryptfs_kernel.h"
@@ -63,6 +62,16 @@ out:
return inode;
}
+static void ecryptfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ecryptfs_inode_info *inode_info;
+ inode_info = ecryptfs_inode_to_private(inode);
+
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
+}
+
/**
* ecryptfs_destroy_inode
* @inode: The ecryptfs inode
@@ -89,7 +98,7 @@ static void ecryptfs_destroy_inode(struct inode *inode)
}
}
ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
- kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
+ call_rcu(&inode->i_rcu, ecryptfs_i_callback);
}
/**
@@ -180,6 +189,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
seq_printf(m, ",ecryptfs_encrypted_view");
if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS)
seq_printf(m, ",ecryptfs_unlink_sigs");
+ if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
+ seq_printf(m, ",ecryptfs_mount_auth_tok_only");
return 0;
}
diff --git a/fs/efs/super.c b/fs/efs/super.c
index f049428..0f31acb 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -20,16 +20,16 @@
static int efs_statfs(struct dentry *dentry, struct kstatfs *buf);
static int efs_fill_super(struct super_block *s, void *d, int silent);
-static int efs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *efs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super);
}
static struct file_system_type efs_fs_type = {
.owner = THIS_MODULE,
.name = "efs",
- .get_sb = efs_get_sb,
+ .mount = efs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -65,11 +65,18 @@ static struct inode *efs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void efs_destroy_inode(struct inode *inode)
+static void efs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
}
+static void efs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, efs_i_callback);
+}
+
static void init_once(void *foo)
{
struct efs_inode_info *ei = (struct efs_inode_info *) foo;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8cf0724..cc8a9b7 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -217,7 +217,7 @@ struct ep_send_events_data {
* Configuration options available inside /proc/sys/fs/epoll/
*/
/* Maximum number of epoll watched descriptors, per user */
-static int max_user_watches __read_mostly;
+static long max_user_watches __read_mostly;
/*
* This mutex is used to serialize ep_free() and eventpoll_release_file().
@@ -240,16 +240,18 @@ static struct kmem_cache *pwq_cache __read_mostly;
#include <linux/sysctl.h>
-static int zero;
+static long zero;
+static long long_max = LONG_MAX;
ctl_table epoll_table[] = {
{
.procname = "max_user_watches",
.data = &max_user_watches,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(max_user_watches),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
+ .extra2 = &long_max,
},
{ }
};
@@ -561,7 +563,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
/* At this point it is safe to free the eventpoll item */
kmem_cache_free(epi_cache, epi);
- atomic_dec(&ep->user->epoll_watches);
+ atomic_long_dec(&ep->user->epoll_watches);
return 0;
}
@@ -898,11 +900,12 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
{
int error, revents, pwake = 0;
unsigned long flags;
+ long user_watches;
struct epitem *epi;
struct ep_pqueue epq;
- if (unlikely(atomic_read(&ep->user->epoll_watches) >=
- max_user_watches))
+ user_watches = atomic_long_read(&ep->user->epoll_watches);
+ if (unlikely(user_watches >= max_user_watches))
return -ENOSPC;
if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL)))
return -ENOMEM;
@@ -966,7 +969,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
spin_unlock_irqrestore(&ep->lock, flags);
- atomic_inc(&ep->user->epoll_watches);
+ atomic_long_inc(&ep->user->epoll_watches);
/* We have to call this outside the lock */
if (pwake)
@@ -1426,6 +1429,7 @@ static int __init eventpoll_init(void)
*/
max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) /
EP_ITEM_COST;
+ BUG_ON(max_user_watches < 0);
/* Initialize the structure used to perform safe poll wait head wake ups */
ep_nested_calls_init(&poll_safewake_ncalls);
diff --git a/fs/exec.c b/fs/exec.c
index 99d33a1..c62efcb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -164,7 +164,26 @@ out:
#ifdef CONFIG_MMU
-static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+{
+ struct mm_struct *mm = current->mm;
+ long diff = (long)(pages - bprm->vma_pages);
+
+ if (!mm || !diff)
+ return;
+
+ bprm->vma_pages = pages;
+
+#ifdef SPLIT_RSS_COUNTING
+ add_mm_counter(mm, MM_ANONPAGES, diff);
+#else
+ spin_lock(&mm->page_table_lock);
+ add_mm_counter(mm, MM_ANONPAGES, diff);
+ spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
@@ -186,6 +205,8 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
struct rlimit *rlim;
+ acct_arg_size(bprm, size / PAGE_SIZE);
+
/*
* We've historically supported up to 32 pages (ARG_MAX)
* of argument strings even with small stacks
@@ -254,6 +275,11 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
INIT_LIST_HEAD(&vma->anon_vma_chain);
+
+ err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1);
+ if (err)
+ goto err;
+
err = insert_vm_struct(mm, vma);
if (err)
goto err;
@@ -276,7 +302,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
#else
-static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+{
+}
+
+struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
@@ -1003,6 +1033,7 @@ int flush_old_exec(struct linux_binprm * bprm)
/*
* Release all of the old mmap stuff
*/
+ acct_arg_size(bprm, 0);
retval = exec_mmap(bprm->mm);
if (retval)
goto out;
@@ -1426,8 +1457,10 @@ int do_execve(const char * filename,
return retval;
out:
- if (bprm->mm)
- mmput (bprm->mm);
+ if (bprm->mm) {
+ acct_arg_size(bprm, 0);
+ mmput(bprm->mm);
+ }
out_file:
if (bprm->file) {
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 047e92f..8c6c466 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -150,12 +150,19 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
return &oi->vfs_inode;
}
+static void exofs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
+}
+
/*
* Remove an inode from the cache
*/
static void exofs_destroy_inode(struct inode *inode)
{
- kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
+ call_rcu(&inode->i_rcu, exofs_i_callback);
}
/*
@@ -659,19 +666,19 @@ free_bdi:
/*
* Set up the superblock (calls exofs_fill_super eventually)
*/
-static int exofs_get_sb(struct file_system_type *type,
+static struct dentry *exofs_mount(struct file_system_type *type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
struct exofs_mountopt opts;
int ret;
ret = parse_options(data, &opts);
if (ret)
- return ret;
+ return ERR_PTR(ret);
opts.dev_name = dev_name;
- return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt);
+ return mount_nodev(type, flags, &opts, exofs_fill_super);
}
/*
@@ -809,7 +816,7 @@ static const struct export_operations exofs_export_ops = {
static struct file_system_type exofs_type = {
.owner = THIS_MODULE,
.name = "exofs",
- .get_sb = exofs_get_sb,
+ .mount = exofs_mount,
.kill_sb = generic_shutdown_super,
};
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 51b3040..4b68257 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -43,24 +43,26 @@ find_acceptable_alias(struct dentry *result,
void *context)
{
struct dentry *dentry, *toput = NULL;
+ struct inode *inode;
if (acceptable(context, result))
return result;
- spin_lock(&dcache_lock);
- list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) {
- dget_locked(dentry);
- spin_unlock(&dcache_lock);
+ inode = result->d_inode;
+ spin_lock(&inode->i_lock);
+ list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ dget(dentry);
+ spin_unlock(&inode->i_lock);
if (toput)
dput(toput);
if (dentry != result && acceptable(context, dentry)) {
dput(result);
return dentry;
}
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
toput = dentry;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
if (toput)
dput(toput);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 2bcc043..7b41805 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -232,10 +232,17 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
}
int
-ext2_check_acl(struct inode *inode, int mask)
+ext2_check_acl(struct inode *inode, int mask, unsigned int flags)
{
- struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
+ struct posix_acl *acl;
+
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ return -EAGAIN;
+ }
+ acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 3ff6cbb..c939b7b 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,7 +54,7 @@ static inline int ext2_acl_count(size_t size)
#ifdef CONFIG_EXT2_FS_POSIX_ACL
/* acl.c */
-extern int ext2_check_acl (struct inode *, int);
+extern int ext2_check_acl (struct inode *, int, unsigned int);
extern int ext2_acl_chmod (struct inode *);
extern int ext2_init_acl (struct inode *, struct inode *);
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2709b34..47cda41 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -28,21 +28,30 @@
typedef struct ext2_dir_entry_2 ext2_dirent;
+/*
+ * Tests against MAX_REC_LEN etc were put in place for 64k block
+ * sizes; if that is not possible on this arch, we can skip
+ * those tests and speed things up.
+ */
static inline unsigned ext2_rec_len_from_disk(__le16 dlen)
{
unsigned len = le16_to_cpu(dlen);
+#if (PAGE_CACHE_SIZE >= 65536)
if (len == EXT2_MAX_REC_LEN)
return 1 << 16;
+#endif
return len;
}
static inline __le16 ext2_rec_len_to_disk(unsigned len)
{
+#if (PAGE_CACHE_SIZE >= 65536)
if (len == (1 << 16))
return cpu_to_le16(EXT2_MAX_REC_LEN);
else
BUG_ON(len > (1 << 16));
+#endif
return cpu_to_le16(len);
}
@@ -129,15 +138,15 @@ static void ext2_check_page(struct page *page, int quiet)
p = (ext2_dirent *)(kaddr + offs);
rec_len = ext2_rec_len_from_disk(p->rec_len);
- if (rec_len < EXT2_DIR_REC_LEN(1))
+ if (unlikely(rec_len < EXT2_DIR_REC_LEN(1)))
goto Eshort;
- if (rec_len & 3)
+ if (unlikely(rec_len & 3))
goto Ealign;
- if (rec_len < EXT2_DIR_REC_LEN(p->name_len))
+ if (unlikely(rec_len < EXT2_DIR_REC_LEN(p->name_len)))
goto Enamelen;
- if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
+ if (unlikely(((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)))
goto Espan;
- if (le32_to_cpu(p->inode) > max_inumber)
+ if (unlikely(le32_to_cpu(p->inode) > max_inumber))
goto Einumber;
}
if (offs != limit)
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index f8aecd2..2e1d834 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -67,7 +67,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
inode = NULL;
if (ino) {
inode = ext2_iget(dir->i_sb, ino);
- if (unlikely(IS_ERR(inode))) {
+ if (IS_ERR(inode)) {
if (PTR_ERR(inode) == -ESTALE) {
ext2_error(dir->i_sb, __func__,
"deleted inode referenced: %lu",
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 09013206..7731695 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -43,9 +43,10 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data);
static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
static int ext2_sync_fs(struct super_block *sb, int wait);
-void ext2_error (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext2_error(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
struct ext2_sb_info *sbi = EXT2_SB(sb);
struct ext2_super_block *es = sbi->s_es;
@@ -59,9 +60,13 @@ void ext2_error (struct super_block * sb, const char * function,
}
va_start(args, fmt);
- printk(KERN_CRIT "EXT2-fs (%s): error: %s: ", sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "EXT2-fs (%s): error: %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
if (test_opt(sb, ERRORS_PANIC))
@@ -76,12 +81,16 @@ void ext2_error (struct super_block * sb, const char * function,
void ext2_msg(struct super_block *sb, const char *prefix,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk("%sEXT2-fs (%s): ", prefix, sb->s_id);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk("%sEXT2-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
+
va_end(args);
}
@@ -161,11 +170,18 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void ext2_destroy_inode(struct inode *inode)
+static void ext2_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
}
+static void ext2_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, ext2_i_callback);
+}
+
static void init_once(void *foo)
{
struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
@@ -1356,10 +1372,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
return 0;
}
-static int ext2_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext2_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
}
#ifdef CONFIG_QUOTA
@@ -1473,7 +1489,7 @@ out:
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
.name = "ext2",
- .get_sb = ext2_get_sb,
+ .mount = ext2_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index f84700b..c2e4dce 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -199,14 +199,6 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
goto found;
entry = next;
}
- /* Check the remaining name entries */
- while (!IS_LAST_ENTRY(entry)) {
- struct ext2_xattr_entry *next =
- EXT2_XATTR_NEXT(entry);
- if ((char *)next >= end)
- goto bad_block;
- entry = next;
- }
if (ext2_xattr_cache_insert(bh))
ea_idebug(inode, "cache insert failed");
error = -ENODATA;
@@ -355,7 +347,7 @@ static void ext2_xattr_update_super_block(struct super_block *sb)
/*
* ext2_xattr_set()
*
- * Create, replace or remove an extended attribute for this inode. Buffer
+ * Create, replace or remove an extended attribute for this inode. Value
* is NULL to remove an existing extended attribute, and non-NULL to
* either replace an existing extended attribute, or create a new extended
* attribute. The flags XATTR_REPLACE and XATTR_CREATE
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 8a11fe2..e4fa49e 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -240,10 +240,17 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
}
int
-ext3_check_acl(struct inode *inode, int mask)
+ext3_check_acl(struct inode *inode, int mask, unsigned int flags)
{
- struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+ struct posix_acl *acl;
+
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ return -EAGAIN;
+ }
+ acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 5973346..5faf8048 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,7 +54,7 @@ static inline int ext3_acl_count(size_t size)
#ifdef CONFIG_EXT3_FS_POSIX_ACL
/* acl.c */
-extern int ext3_check_acl (struct inode *, int);
+extern int ext3_check_acl (struct inode *, int, unsigned int);
extern int ext3_acl_chmod (struct inode *);
extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index b3db226..045995c 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -20,6 +20,7 @@
#include <linux/ext3_jbd.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
+#include <linux/blkdev.h>
/*
* balloc.c contains the blocks allocation and deallocation routines
@@ -39,6 +40,21 @@
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+/*
+ * Calculate the block group number and offset, given a block number
+ */
+static void ext3_get_group_no_and_offset(struct super_block *sb,
+ ext3_fsblk_t blocknr, unsigned long *blockgrpp, ext3_grpblk_t *offsetp)
+{
+ struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+
+ blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
+ if (offsetp)
+ *offsetp = blocknr % EXT3_BLOCKS_PER_GROUP(sb);
+ if (blockgrpp)
+ *blockgrpp = blocknr / EXT3_BLOCKS_PER_GROUP(sb);
+}
+
/**
* ext3_get_group_desc() -- load group descriptor from disk
* @sb: super block
@@ -1885,3 +1901,253 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
return ext3_bg_num_gdb_meta(sb,group);
}
+
+/**
+ * ext3_trim_all_free -- function to trim all free space in alloc. group
+ * @sb: super block for file system
+ * @group: allocation group to trim
+ * @start: first group block to examine
+ * @max: last group block to examine
+ * @gdp: allocation group description structure
+ * @minblocks: minimum extent block count
+ *
+ * ext3_trim_all_free walks through group's block bitmap searching for free
+ * blocks. When the free block is found, it tries to allocate this block and
+ * consequent free block to get the biggest free extent possible, until it
+ * reaches any used block. Then issue a TRIM command on this extent and free
+ * the extent in the block bitmap. This is done until whole group is scanned.
+ */
+ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
+ ext3_grpblk_t start, ext3_grpblk_t max,
+ ext3_grpblk_t minblocks)
+{
+ handle_t *handle;
+ ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
+ ext3_fsblk_t discard_block;
+ struct ext3_sb_info *sbi;
+ struct buffer_head *gdp_bh, *bitmap_bh = NULL;
+ struct ext3_group_desc *gdp;
+ int err = 0, ret = 0;
+
+ /*
+ * We will update one block bitmap, and one group descriptor
+ */
+ handle = ext3_journal_start_sb(sb, 2);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ bitmap_bh = read_block_bitmap(sb, group);
+ if (!bitmap_bh) {
+ err = -EIO;
+ goto err_out;
+ }
+
+ BUFFER_TRACE(bitmap_bh, "getting undo access");
+ err = ext3_journal_get_undo_access(handle, bitmap_bh);
+ if (err)
+ goto err_out;
+
+ gdp = ext3_get_group_desc(sb, group, &gdp_bh);
+ if (!gdp) {
+ err = -EIO;
+ goto err_out;
+ }
+
+ BUFFER_TRACE(gdp_bh, "get_write_access");
+ err = ext3_journal_get_write_access(handle, gdp_bh);
+ if (err)
+ goto err_out;
+
+ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+ sbi = EXT3_SB(sb);
+
+ /* Walk through the whole group */
+ while (start < max) {
+ start = bitmap_search_next_usable_block(start, bitmap_bh, max);
+ if (start < 0)
+ break;
+ next = start;
+
+ /*
+ * Allocate contiguous free extents by setting bits in the
+ * block bitmap
+ */
+ while (next < max
+ && claim_block(sb_bgl_lock(sbi, group),
+ next, bitmap_bh)) {
+ next++;
+ }
+
+ /* We did not claim any blocks */
+ if (next == start)
+ continue;
+
+ discard_block = (ext3_fsblk_t)start +
+ ext3_group_first_block_no(sb, group);
+
+ /* Update counters */
+ spin_lock(sb_bgl_lock(sbi, group));
+ le16_add_cpu(&gdp->bg_free_blocks_count, start - next);
+ spin_unlock(sb_bgl_lock(sbi, group));
+ percpu_counter_sub(&sbi->s_freeblocks_counter, next - start);
+
+ /* Do not issue a TRIM on extents smaller than minblocks */
+ if ((next - start) < minblocks)
+ goto free_extent;
+
+ /* Send the TRIM command down to the device */
+ err = sb_issue_discard(sb, discard_block, next - start,
+ GFP_NOFS, 0);
+ count += (next - start);
+free_extent:
+ freed = 0;
+
+ /*
+ * Clear bits in the bitmap
+ */
+ for (bit = start; bit < next; bit++) {
+ BUFFER_TRACE(bitmap_bh, "clear bit");
+ if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group),
+ bit, bitmap_bh->b_data)) {
+ ext3_error(sb, __func__,
+ "bit already cleared for block "E3FSBLK,
+ (unsigned long)bit);
+ BUFFER_TRACE(bitmap_bh, "bit already cleared");
+ } else {
+ freed++;
+ }
+ }
+
+ /* Update couters */
+ spin_lock(sb_bgl_lock(sbi, group));
+ le16_add_cpu(&gdp->bg_free_blocks_count, freed);
+ spin_unlock(sb_bgl_lock(sbi, group));
+ percpu_counter_add(&sbi->s_freeblocks_counter, freed);
+
+ start = next;
+ if (err < 0) {
+ if (err != -EOPNOTSUPP)
+ ext3_warning(sb, __func__, "Discard command "
+ "returned error %d\n", err);
+ break;
+ }
+
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ break;
+ }
+
+ cond_resched();
+
+ /* No more suitable extents */
+ if ((free_blocks - count) < minblocks)
+ break;
+ }
+
+ /* We dirtied the bitmap block */
+ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
+ ret = ext3_journal_dirty_metadata(handle, bitmap_bh);
+ if (!err)
+ err = ret;
+
+ /* And the group descriptor block */
+ BUFFER_TRACE(gdp_bh, "dirtied group descriptor block");
+ ret = ext3_journal_dirty_metadata(handle, gdp_bh);
+ if (!err)
+ err = ret;
+
+ ext3_debug("trimmed %d blocks in the group %d\n",
+ count, group);
+
+err_out:
+ if (err)
+ count = err;
+ ext3_journal_stop(handle);
+ brelse(bitmap_bh);
+
+ return count;
+}
+
+/**
+ * ext3_trim_fs() -- trim ioctl handle function
+ * @sb: superblock for filesystem
+ * @start: First Byte to trim
+ * @len: number of Bytes to trim from start
+ * @minlen: minimum extent length in Bytes
+ *
+ * ext3_trim_fs goes through all allocation groups containing Bytes from
+ * start to start+len. For each such a group ext3_trim_all_free function
+ * is invoked to trim all free space.
+ */
+int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+ ext3_grpblk_t last_block, first_block, free_blocks;
+ unsigned long first_group, last_group;
+ unsigned long group, ngroups;
+ struct ext3_group_desc *gdp;
+ struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+ uint64_t start, len, minlen, trimmed;
+ ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
+ int ret = 0;
+
+ start = range->start >> sb->s_blocksize_bits;
+ len = range->len >> sb->s_blocksize_bits;
+ minlen = range->minlen >> sb->s_blocksize_bits;
+ trimmed = 0;
+
+ if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
+ return -EINVAL;
+ if (start >= max_blks)
+ goto out;
+ if (start < le32_to_cpu(es->s_first_data_block)) {
+ len -= le32_to_cpu(es->s_first_data_block) - start;
+ start = le32_to_cpu(es->s_first_data_block);
+ }
+ if (start + len > max_blks)
+ len = max_blks - start;
+
+ ngroups = EXT3_SB(sb)->s_groups_count;
+ smp_rmb();
+
+ /* Determine first and last group to examine based on start and len */
+ ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
+ &first_group, &first_block);
+ ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len),
+ &last_group, &last_block);
+ last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
+ last_block = EXT3_BLOCKS_PER_GROUP(sb);
+
+ if (first_group > last_group)
+ return -EINVAL;
+
+ for (group = first_group; group <= last_group; group++) {
+ gdp = ext3_get_group_desc(sb, group, NULL);
+ if (!gdp)
+ break;
+
+ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+ if (free_blocks < minlen)
+ continue;
+
+ if (len >= EXT3_BLOCKS_PER_GROUP(sb))
+ len -= (EXT3_BLOCKS_PER_GROUP(sb) - first_block);
+ else
+ last_block = first_block + len;
+
+ ret = ext3_trim_all_free(sb, group, first_block,
+ last_block, minlen);
+ if (ret < 0)
+ break;
+
+ trimmed += ret;
+ first_block = 0;
+ }
+
+ if (ret >= 0)
+ ret = 0;
+
+out:
+ range->len = trimmed * sb->s_blocksize;
+
+ return ret;
+}
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index e2e72c3..34f0a07 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -69,25 +69,26 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
const char * error_msg = NULL;
const int rlen = ext3_rec_len_from_disk(de->rec_len);
- if (rlen < EXT3_DIR_REC_LEN(1))
+ if (unlikely(rlen < EXT3_DIR_REC_LEN(1)))
error_msg = "rec_len is smaller than minimal";
- else if (rlen % 4 != 0)
+ else if (unlikely(rlen % 4 != 0))
error_msg = "rec_len % 4 != 0";
- else if (rlen < EXT3_DIR_REC_LEN(de->name_len))
+ else if (unlikely(rlen < EXT3_DIR_REC_LEN(de->name_len)))
error_msg = "rec_len is too small for name_len";
- else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
+ else if (unlikely((((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)))
error_msg = "directory entry across blocks";
- else if (le32_to_cpu(de->inode) >
- le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
+ else if (unlikely(le32_to_cpu(de->inode) >
+ le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)))
error_msg = "inode out of bounds";
- if (error_msg != NULL)
+ if (unlikely(error_msg != NULL))
ext3_error (dir->i_sb, function,
"bad entry in directory #%lu: %s - "
"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
dir->i_ino, error_msg, offset,
(unsigned long) le32_to_cpu(de->inode),
rlen, de->name_len);
+
return error_msg == NULL ? 1 : 0;
}
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a958061..ae94f6d 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2145,13 +2145,15 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
if (try_to_extend_transaction(handle, inode)) {
if (bh) {
BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, bh);
+ if (ext3_journal_dirty_metadata(handle, bh))
+ return;
}
ext3_mark_inode_dirty(handle, inode);
truncate_restart_transaction(handle, inode);
if (bh) {
BUFFER_TRACE(bh, "retaking write access");
- ext3_journal_get_write_access(handle, bh);
+ if (ext3_journal_get_write_access(handle, bh))
+ return;
}
}
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 8897481..fc080dd 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -276,7 +276,29 @@ group_add_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
+ case FITRIM: {
+ struct super_block *sb = inode->i_sb;
+ struct fstrim_range range;
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&range, (struct fstrim_range *)arg,
+ sizeof(range)))
+ return -EFAULT;
+
+ ret = ext3_trim_fs(sb, &range);
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user((struct fstrim_range *)arg, &range,
+ sizeof(range)))
+ return -EFAULT;
+
+ return 0;
+ }
default:
return -ENOTTY;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index bce9dce..b27ba71 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -858,6 +858,7 @@ static struct buffer_head *ext3_find_entry(struct inode *dir,
struct buffer_head * bh_use[NAMEI_RA_SIZE];
struct buffer_head * bh, *ret = NULL;
unsigned long start, block, b;
+ const u8 *name = entry->name;
int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
int ra_ptr = 0; /* Current index into readahead
@@ -871,6 +872,16 @@ static struct buffer_head *ext3_find_entry(struct inode *dir,
namelen = entry->len;
if (namelen > EXT3_NAME_LEN)
return NULL;
+ if ((namelen <= 2) && (name[0] == '.') &&
+ (name[1] == '.' || name[1] == 0)) {
+ /*
+ * "." or ".." will only be in the first block
+ * NFS may look up ".."; "." should be handled by the VFS
+ */
+ block = start = 0;
+ nblocks = 1;
+ goto restart;
+ }
if (is_dx(dir)) {
bh = ext3_dx_find_entry(dir, entry, res_dir, &err);
/*
@@ -961,55 +972,35 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
struct qstr *entry, struct ext3_dir_entry_2 **res_dir,
int *err)
{
- struct super_block * sb;
+ struct super_block *sb = dir->i_sb;
struct dx_hash_info hinfo;
- u32 hash;
struct dx_frame frames[2], *frame;
- struct ext3_dir_entry_2 *de, *top;
struct buffer_head *bh;
unsigned long block;
int retval;
- int namelen = entry->len;
- const u8 *name = entry->name;
- sb = dir->i_sb;
- /* NFS may look up ".." - look at dx_root directory block */
- if (namelen > 2 || name[0] != '.'|| (namelen == 2 && name[1] != '.')) {
- if (!(frame = dx_probe(entry, dir, &hinfo, frames, err)))
- return NULL;
- } else {
- frame = frames;
- frame->bh = NULL; /* for dx_release() */
- frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
- dx_set_block(frame->at, 0); /* dx_root block is 0 */
- }
- hash = hinfo.hash;
+ if (!(frame = dx_probe(entry, dir, &hinfo, frames, err)))
+ return NULL;
do {
block = dx_get_block(frame->at);
if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
goto errout;
- de = (struct ext3_dir_entry_2 *) bh->b_data;
- top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
- EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de)) {
- int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
- + ((char *) de - bh->b_data);
-
- if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto errout;
- }
- if (ext3_match(namelen, name, de)) {
- *res_dir = de;
- dx_release(frames);
- return bh;
- }
+ retval = search_dirblock(bh, dir, entry,
+ block << EXT3_BLOCK_SIZE_BITS(sb),
+ res_dir);
+ if (retval == 1) {
+ dx_release(frames);
+ return bh;
}
- brelse (bh);
+ brelse(bh);
+ if (retval == -1) {
+ *err = ERR_BAD_DX_DIR;
+ goto errout;
+ }
+
/* Check to see if we should continue to search */
- retval = ext3_htree_next_block(dir, hash, frame,
+ retval = ext3_htree_next_block(dir, hinfo.hash, frame,
frames, NULL);
if (retval < 0) {
ext3_warning(sb, __func__,
@@ -1047,7 +1038,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
return ERR_PTR(-EIO);
}
inode = ext3_iget(dir->i_sb, ino);
- if (unlikely(IS_ERR(inode))) {
+ if (IS_ERR(inode)) {
if (PTR_ERR(inode) == -ESTALE) {
ext3_error(dir->i_sb, __func__,
"deleted inode referenced: %lu",
@@ -1607,7 +1598,9 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (err)
goto journal_error;
}
- ext3_journal_dirty_metadata(handle, frames[0].bh);
+ err = ext3_journal_dirty_metadata(handle, frames[0].bh);
+ if (err)
+ goto journal_error;
}
de = do_split(handle, dir, &bh, frame, &hinfo, &err);
if (!de)
@@ -1644,8 +1637,13 @@ static int ext3_delete_entry (handle_t *handle,
if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
return -EIO;
if (de == de_del) {
+ int err;
+
BUFFER_TRACE(bh, "get_write_access");
- ext3_journal_get_write_access(handle, bh);
+ err = ext3_journal_get_write_access(handle, bh);
+ if (err)
+ goto journal_error;
+
if (pde)
pde->rec_len = ext3_rec_len_to_disk(
ext3_rec_len_from_disk(pde->rec_len) +
@@ -1654,7 +1652,12 @@ static int ext3_delete_entry (handle_t *handle,
de->inode = 0;
dir->i_version++;
BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, bh);
+ err = ext3_journal_dirty_metadata(handle, bh);
+ if (err) {
+journal_error:
+ ext3_std_error(dir->i_sb, err);
+ return err;
+ }
return 0;
}
i += ext3_rec_len_from_disk(de->rec_len);
@@ -1762,7 +1765,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
{
handle_t *handle;
struct inode * inode;
- struct buffer_head * dir_block;
+ struct buffer_head * dir_block = NULL;
struct ext3_dir_entry_2 * de;
int err, retries = 0;
@@ -1790,15 +1793,14 @@ retry:
inode->i_fop = &ext3_dir_operations;
inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
dir_block = ext3_bread (handle, inode, 0, 1, &err);
- if (!dir_block) {
- drop_nlink(inode); /* is this nlink == 0? */
- unlock_new_inode(inode);
- ext3_mark_inode_dirty(handle, inode);
- iput (inode);
- goto out_stop;
- }
+ if (!dir_block)
+ goto out_clear_inode;
+
BUFFER_TRACE(dir_block, "get_write_access");
- ext3_journal_get_write_access(handle, dir_block);
+ err = ext3_journal_get_write_access(handle, dir_block);
+ if (err)
+ goto out_clear_inode;
+
de = (struct ext3_dir_entry_2 *) dir_block->b_data;
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
@@ -1814,11 +1816,16 @@ retry:
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
inode->i_nlink = 2;
BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, dir_block);
- brelse (dir_block);
- ext3_mark_inode_dirty(handle, inode);
- err = ext3_add_entry (handle, dentry, inode);
+ err = ext3_journal_dirty_metadata(handle, dir_block);
+ if (err)
+ goto out_clear_inode;
+
+ err = ext3_mark_inode_dirty(handle, inode);
+ if (!err)
+ err = ext3_add_entry (handle, dentry, inode);
+
if (err) {
+out_clear_inode:
inode->i_nlink = 0;
unlock_new_inode(inode);
ext3_mark_inode_dirty(handle, inode);
@@ -1827,10 +1834,14 @@ retry:
}
inc_nlink(dir);
ext3_update_dx_flag(dir);
- ext3_mark_inode_dirty(handle, dir);
+ err = ext3_mark_inode_dirty(handle, dir);
+ if (err)
+ goto out_clear_inode;
+
d_instantiate(dentry, inode);
unlock_new_inode(inode);
out_stop:
+ brelse(dir_block);
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
@@ -2353,7 +2364,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
goto end_rename;
} else {
BUFFER_TRACE(new_bh, "get write access");
- ext3_journal_get_write_access(handle, new_bh);
+ retval = ext3_journal_get_write_access(handle, new_bh);
+ if (retval)
+ goto journal_error;
new_de->inode = cpu_to_le32(old_inode->i_ino);
if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
EXT3_FEATURE_INCOMPAT_FILETYPE))
@@ -2362,7 +2375,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME_SEC;
ext3_mark_inode_dirty(handle, new_dir);
BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, new_bh);
+ retval = ext3_journal_dirty_metadata(handle, new_bh);
+ if (retval)
+ goto journal_error;
brelse(new_bh);
new_bh = NULL;
}
@@ -2411,10 +2426,17 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
ext3_update_dx_flag(old_dir);
if (dir_bh) {
BUFFER_TRACE(dir_bh, "get_write_access");
- ext3_journal_get_write_access(handle, dir_bh);
+ retval = ext3_journal_get_write_access(handle, dir_bh);
+ if (retval)
+ goto journal_error;
PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, dir_bh);
+ retval = ext3_journal_dirty_metadata(handle, dir_bh);
+ if (retval) {
+journal_error:
+ ext3_std_error(new_dir->i_sb, retval);
+ goto end_rename;
+ }
drop_nlink(old_dir);
if (new_inode) {
drop_nlink(new_inode);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index e746d30..108b142 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -249,7 +249,11 @@ static int setup_new_group_blocks(struct super_block *sb,
memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
unlock_buffer(gdb);
- ext3_journal_dirty_metadata(handle, gdb);
+ err = ext3_journal_dirty_metadata(handle, gdb);
+ if (err) {
+ brelse(gdb);
+ goto exit_bh;
+ }
ext3_set_bit(bit, bh->b_data);
brelse(gdb);
}
@@ -269,7 +273,11 @@ static int setup_new_group_blocks(struct super_block *sb,
err = PTR_ERR(gdb);
goto exit_bh;
}
- ext3_journal_dirty_metadata(handle, gdb);
+ err = ext3_journal_dirty_metadata(handle, gdb);
+ if (err) {
+ brelse(gdb);
+ goto exit_bh;
+ }
ext3_set_bit(bit, bh->b_data);
brelse(gdb);
}
@@ -295,7 +303,11 @@ static int setup_new_group_blocks(struct super_block *sb,
err = PTR_ERR(it);
goto exit_bh;
}
- ext3_journal_dirty_metadata(handle, it);
+ err = ext3_journal_dirty_metadata(handle, it);
+ if (err) {
+ brelse(it);
+ goto exit_bh;
+ }
brelse(it);
ext3_set_bit(bit, bh->b_data);
}
@@ -306,7 +318,9 @@ static int setup_new_group_blocks(struct super_block *sb,
mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
bh->b_data);
- ext3_journal_dirty_metadata(handle, bh);
+ err = ext3_journal_dirty_metadata(handle, bh);
+ if (err)
+ goto exit_bh;
brelse(bh);
/* Mark unused entries in inode bitmap used */
@@ -319,7 +333,7 @@ static int setup_new_group_blocks(struct super_block *sb,
mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
bh->b_data);
- ext3_journal_dirty_metadata(handle, bh);
+ err = ext3_journal_dirty_metadata(handle, bh);
exit_bh:
brelse(bh);
@@ -503,12 +517,19 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
* reserved inode, and will become GDT blocks (primary and backup).
*/
data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
- ext3_journal_dirty_metadata(handle, dind);
+ err = ext3_journal_dirty_metadata(handle, dind);
+ if (err)
+ goto exit_group_desc;
brelse(dind);
+ dind = NULL;
inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
- ext3_mark_iloc_dirty(handle, inode, &iloc);
+ err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ if (err)
+ goto exit_group_desc;
memset((*primary)->b_data, 0, sb->s_blocksize);
- ext3_journal_dirty_metadata(handle, *primary);
+ err = ext3_journal_dirty_metadata(handle, *primary);
+ if (err)
+ goto exit_group_desc;
o_group_desc = EXT3_SB(sb)->s_group_desc;
memcpy(n_group_desc, o_group_desc,
@@ -519,10 +540,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
kfree(o_group_desc);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
- ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ if (err)
+ goto exit_inode;
return 0;
+exit_group_desc:
+ kfree(n_group_desc);
exit_inode:
//ext3_journal_release_buffer(handle, iloc.bh);
brelse(iloc.bh);
@@ -706,16 +731,20 @@ static void update_backups(struct super_block *sb,
}
ext3_debug("update metadata backup %#04lx\n",
(unsigned long)bh->b_blocknr);
- if ((err = ext3_journal_get_write_access(handle, bh)))
+ if ((err = ext3_journal_get_write_access(handle, bh))) {
+ brelse(bh);
break;
+ }
lock_buffer(bh);
memcpy(bh->b_data, data, size);
if (rest)
memset(bh->b_data + size, 0, rest);
set_buffer_uptodate(bh);
unlock_buffer(bh);
- ext3_journal_dirty_metadata(handle, bh);
+ err = ext3_journal_dirty_metadata(handle, bh);
brelse(bh);
+ if (err)
+ break;
}
if ((err2 = ext3_journal_stop(handle)) && !err)
err = err2;
@@ -922,7 +951,9 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
/* Update the global fs size fields */
sbi->s_groups_count++;
- ext3_journal_dirty_metadata(handle, primary);
+ err = ext3_journal_dirty_metadata(handle, primary);
+ if (err)
+ goto exit_journal;
/* Update the reserved block counts only once the new group is
* active. */
@@ -934,7 +965,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
percpu_counter_add(&sbi->s_freeinodes_counter,
EXT3_INODES_PER_GROUP(sb));
- ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+ err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
exit_journal:
mutex_unlock(&sbi->s_resize_lock);
@@ -1064,8 +1095,14 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
goto exit_put;
}
es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
- ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
+ if (err) {
+ ext3_warning(sb, __func__,
+ "error %d on journal dirty metadata", err);
+ ext3_journal_stop(handle);
+ goto exit_put;
+ }
ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n",
o_blocks_count, o_blocks_count + add);
ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index db87413..b7d0554 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -27,7 +27,6 @@
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/parser.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/exportfs.h>
#include <linux/vfs.h>
@@ -144,12 +143,16 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn,
void ext3_msg(struct super_block *sb, const char *prefix,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk("%sEXT3-fs (%s): ", prefix, sb->s_id);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
+
va_end(args);
}
@@ -196,15 +199,20 @@ static void ext3_handle_error(struct super_block *sb)
sb->s_id);
}
-void ext3_error (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext3_error(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
ext3_handle_error(sb);
@@ -275,15 +283,20 @@ void __ext3_std_error (struct super_block * sb, const char * function,
* case we take the easy way out and panic immediately.
*/
-void ext3_abort (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext3_abort(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
if (test_opt(sb, ERRORS_PANIC))
@@ -301,16 +314,20 @@ void ext3_abort (struct super_block * sb, const char * function,
journal_abort(EXT3_SB(sb)->s_journal, -EIO);
}
-void ext3_warning (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext3_warning(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ",
- sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
}
@@ -480,6 +497,13 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
+static void ext3_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+}
+
static void ext3_destroy_inode(struct inode *inode)
{
if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
@@ -490,7 +514,7 @@ static void ext3_destroy_inode(struct inode *inode)
false);
dump_stack();
}
- kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+ call_rcu(&inode->i_rcu, ext3_i_callback);
}
static void init_once(void *foo)
@@ -1842,13 +1866,15 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount;
}
- if (generic_check_addressable(sb->s_blocksize_bits,
- le32_to_cpu(es->s_blocks_count))) {
+ err = generic_check_addressable(sb->s_blocksize_bits,
+ le32_to_cpu(es->s_blocks_count));
+ if (err) {
ext3_msg(sb, KERN_ERR,
"error: filesystem is too large to mount safely");
if (sizeof(sector_t) < 8)
ext3_msg(sb, KERN_ERR,
"error: CONFIG_LBDAF not enabled");
+ ret = err;
goto failed_mount;
}
@@ -2291,7 +2317,7 @@ static int ext3_load_journal(struct super_block *sb,
EXT3_SB(sb)->s_journal = journal;
ext3_clear_journal_err(sb, es);
- if (journal_devnum &&
+ if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
es->s_journal_dev = cpu_to_le32(journal_devnum);
@@ -3020,16 +3046,16 @@ out:
#endif
-static int ext3_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext3_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
}
static struct file_system_type ext3_fs_type = {
.owner = THIS_MODULE,
.name = "ext3",
- .get_sb = ext3_get_sb,
+ .mount = ext3_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index e69dc6d..32e6cc2 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -925,7 +925,7 @@ ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
/*
* ext3_xattr_set_handle()
*
- * Create, replace or remove an extended attribute for this inode. Buffer
+ * Create, replace or remove an extended attribute for this inode. Value
* is NULL to remove an existing extended attribute, and non-NULL to
* either replace an existing extended attribute, or create a new extended
* attribute. The flags XATTR_REPLACE and XATTR_CREATE
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 5e2ed45..e0270d1 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -238,10 +238,17 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
}
int
-ext4_check_acl(struct inode *inode, int mask)
+ext4_check_acl(struct inode *inode, int mask, unsigned int flags)
{
- struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
+ struct posix_acl *acl;
+
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ return -EAGAIN;
+ }
+ acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 9d843d5..dec8211 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size)
#ifdef CONFIG_EXT4_FS_POSIX_ACL
/* acl.c */
-extern int ext4_check_acl(struct inode *, int);
+extern int ext4_check_acl(struct inode *, int, unsigned int);
extern int ext4_acl_chmod(struct inode *);
extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 14c3af2..adf96b8 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -592,7 +592,8 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
* Account for the allocated meta blocks. We will never
* fail EDQUOT for metdata, but we do account for it.
*/
- if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
+ if (!(*errp) &&
+ ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ece76fb..164c560 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -60,9 +60,13 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
return (ext4_filetype_table[filetype]);
}
-
+/*
+ * Return 0 if the directory entry is OK, and 1 if there is a problem
+ *
+ * Note: this is the opposite of what ext2 and ext3 historically returned...
+ */
int __ext4_check_dir_entry(const char *function, unsigned int line,
- struct inode *dir,
+ struct inode *dir, struct file *filp,
struct ext4_dir_entry_2 *de,
struct buffer_head *bh,
unsigned int offset)
@@ -71,26 +75,37 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
const int rlen = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
- if (rlen < EXT4_DIR_REC_LEN(1))
+ if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
error_msg = "rec_len is smaller than minimal";
- else if (rlen % 4 != 0)
+ else if (unlikely(rlen % 4 != 0))
error_msg = "rec_len % 4 != 0";
- else if (rlen < EXT4_DIR_REC_LEN(de->name_len))
+ else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
error_msg = "rec_len is too small for name_len";
- else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
+ else if (unlikely(((char *) de - bh->b_data) + rlen >
+ dir->i_sb->s_blocksize))
error_msg = "directory entry across blocks";
- else if (le32_to_cpu(de->inode) >
- le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))
+ else if (unlikely(le32_to_cpu(de->inode) >
+ le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
error_msg = "inode out of bounds";
+ else
+ return 0;
- if (error_msg != NULL)
- ext4_error_inode(dir, function, line, bh->b_blocknr,
- "bad entry in directory: %s - "
- "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned) (offset%bh->b_size), offset,
- le32_to_cpu(de->inode),
- rlen, de->name_len);
- return error_msg == NULL ? 1 : 0;
+ if (filp)
+ ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0,
+ "bad entry in directory: %s - offset=%u(%u), "
+ "inode=%u, rec_len=%d, name_len=%d",
+ error_msg, (unsigned) (offset%bh->b_size),
+ offset, le32_to_cpu(de->inode),
+ rlen, de->name_len);
+ else
+ ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0,
+ "bad entry in directory: %s - offset=%u(%u), "
+ "inode=%u, rec_len=%d, name_len=%d",
+ error_msg, (unsigned) (offset%bh->b_size),
+ offset, le32_to_cpu(de->inode),
+ rlen, de->name_len);
+
+ return 1;
}
static int ext4_readdir(struct file *filp,
@@ -152,8 +167,9 @@ static int ext4_readdir(struct file *filp,
*/
if (!bh) {
if (!dir_has_error) {
- EXT4_ERROR_INODE(inode, "directory "
- "contains a hole at offset %Lu",
+ EXT4_ERROR_FILE(filp, 0,
+ "directory contains a "
+ "hole at offset %llu",
(unsigned long long) filp->f_pos);
dir_has_error = 1;
}
@@ -194,8 +210,8 @@ revalidate:
while (!error && filp->f_pos < inode->i_size
&& offset < sb->s_blocksize) {
de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
- if (!ext4_check_dir_entry(inode, de,
- bh, offset)) {
+ if (ext4_check_dir_entry(inode, filp, de,
+ bh, offset)) {
/*
* On error, skip the f_pos to the next block
*/
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8b5dd636..1de65f5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -62,8 +62,8 @@
#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
-#define EXT4_ERROR_FILE(file, fmt, a...) \
- ext4_error_file(__func__, __LINE__, (file), (fmt), ## a)
+#define EXT4_ERROR_FILE(file, block, fmt, a...) \
+ ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
/* data type for block offset of block group */
typedef int ext4_grpblk_t;
@@ -177,7 +177,7 @@ struct mpage_da_data {
struct ext4_io_page {
struct page *p_page;
- int p_count;
+ atomic_t p_count;
};
#define MAX_IO_PAGES 128
@@ -561,23 +561,7 @@ struct ext4_new_group_data {
#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
#endif
-
-/*
- * Mount options
- */
-struct ext4_mount_options {
- unsigned long s_mount_opt;
- uid_t s_resuid;
- gid_t s_resgid;
- unsigned long s_commit_interval;
- u32 s_min_batch_time, s_max_batch_time;
-#ifdef CONFIG_QUOTA
- int s_jquota_fmt;
- char *s_qf_names[MAXQUOTAS];
-#endif
-};
-
-/* Max physical block we can addres w/o extents */
+/* Max physical block we can address w/o extents */
#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF
/*
@@ -709,6 +693,8 @@ do { \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
ext4_decode_extra_time(&(inode)->xtime, \
raw_inode->xtime ## _extra); \
+ else \
+ (inode)->xtime.tv_nsec = 0; \
} while (0)
#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
@@ -719,6 +705,8 @@ do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
ext4_decode_extra_time(&(einode)->xtime, \
raw_inode->xtime ## _extra); \
+ else \
+ (einode)->xtime.tv_nsec = 0; \
} while (0)
#define i_disk_version osd1.linux1.l_i_version
@@ -750,12 +738,13 @@ do { \
/*
* storage for cached extent
+ * If ec_len == 0, then the cache is invalid.
+ * If ec_start == 0, then the cache represents a gap (null mapping)
*/
struct ext4_ext_cache {
ext4_fsblk_t ec_start;
ext4_lblk_t ec_block;
__u32 ec_len; /* must be 32bit to return holes */
- __u32 ec_type;
};
/*
@@ -774,10 +763,12 @@ struct ext4_inode_info {
* near to their parent directory's inode.
*/
ext4_group_t i_block_group;
+ ext4_lblk_t i_dir_start_lookup;
+#if (BITS_PER_LONG < 64)
unsigned long i_state_flags; /* Dynamic state flags */
+#endif
unsigned long i_flags;
- ext4_lblk_t i_dir_start_lookup;
#ifdef CONFIG_EXT4_FS_XATTR
/*
* Extended attributes can be read independently of the main file
@@ -820,7 +811,7 @@ struct ext4_inode_info {
*/
struct rw_semaphore i_data_sem;
struct inode vfs_inode;
- struct jbd2_inode jinode;
+ struct jbd2_inode *jinode;
struct ext4_ext_cache i_cached_extent;
/*
@@ -840,14 +831,12 @@ struct ext4_inode_info {
unsigned int i_reserved_data_blocks;
unsigned int i_reserved_meta_blocks;
unsigned int i_allocated_meta_blocks;
- unsigned short i_delalloc_reserved_flag;
- sector_t i_da_metadata_calc_last_lblock;
+ ext4_lblk_t i_da_metadata_calc_last_lblock;
int i_da_metadata_calc_len;
/* on-disk additional length */
__u16 i_extra_isize;
- spinlock_t i_block_reservation_lock;
#ifdef CONFIG_QUOTA
/* quota space reservation, managed internally by quota code */
qsize_t i_reserved_quota;
@@ -856,9 +845,12 @@ struct ext4_inode_info {
/* completed IOs that might need unwritten extents handling */
struct list_head i_completed_io_list;
spinlock_t i_completed_io_lock;
+ atomic_t i_ioend_count; /* Number of outstanding io_end structs */
/* current io_end structure for async DIO write*/
ext4_io_end_t *cur_aio_dio;
+ spinlock_t i_block_reservation_lock;
+
/*
* Transactions that contain inode's metadata needed to complete
* fsync and fdatasync, respectively.
@@ -909,17 +901,27 @@ struct ext4_inode_info {
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
+#define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
-#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
-#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
+#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
+ ~EXT4_MOUNT_##opt
+#define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \
+ EXT4_MOUNT_##opt
#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
EXT4_MOUNT_##opt)
+#define clear_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 &= \
+ ~EXT4_MOUNT2_##opt
+#define set_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 |= \
+ EXT4_MOUNT2_##opt
+#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
+ EXT4_MOUNT2_##opt)
+
#define ext4_set_bit ext2_set_bit
#define ext4_set_bit_atomic ext2_set_bit_atomic
#define ext4_clear_bit ext2_clear_bit
@@ -1085,6 +1087,7 @@ struct ext4_sb_info {
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc;
unsigned int s_mount_opt;
+ unsigned int s_mount_opt2;
unsigned int s_mount_flags;
ext4_fsblk_t s_sb_block;
uid_t s_resuid;
@@ -1235,24 +1238,39 @@ enum {
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
EXT4_STATE_NEWENTRY, /* File just added to dir */
+ EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
};
-#define EXT4_INODE_BIT_FNS(name, field) \
+#define EXT4_INODE_BIT_FNS(name, field, offset) \
static inline int ext4_test_inode_##name(struct inode *inode, int bit) \
{ \
- return test_bit(bit, &EXT4_I(inode)->i_##field); \
+ return test_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
} \
static inline void ext4_set_inode_##name(struct inode *inode, int bit) \
{ \
- set_bit(bit, &EXT4_I(inode)->i_##field); \
+ set_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
} \
static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
{ \
- clear_bit(bit, &EXT4_I(inode)->i_##field); \
+ clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
+}
+
+EXT4_INODE_BIT_FNS(flag, flags, 0)
+#if (BITS_PER_LONG < 64)
+EXT4_INODE_BIT_FNS(state, state_flags, 0)
+
+static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
+{
+ (ei)->i_state_flags = 0;
}
+#else
+EXT4_INODE_BIT_FNS(state, flags, 32)
-EXT4_INODE_BIT_FNS(flag, flags)
-EXT4_INODE_BIT_FNS(state, state_flags)
+static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
+{
+ /* We depend on the fact that callers will set i_flags */
+}
+#endif
#else
/* Assume that user mode programs are passing in an ext4fs superblock, not
* a kernel struct super_block. This will allow us to call the feature-test
@@ -1640,10 +1658,12 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb,
/* dir.c */
extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
+ struct file *,
struct ext4_dir_entry_2 *,
struct buffer_head *, unsigned int);
-#define ext4_check_dir_entry(dir, de, bh, offset) \
- __ext4_check_dir_entry(__func__, __LINE__, (dir), (de), (bh), (offset))
+#define ext4_check_dir_entry(dir, filp, de, bh, offset) \
+ unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \
+ (de), (bh), (offset)))
extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
__u32 minor_hash,
struct ext4_dir_entry_2 *dirent);
@@ -1651,6 +1671,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
/* fsync.c */
extern int ext4_sync_file(struct file *, int);
+extern int ext4_flush_completed_IO(struct inode *);
/* hash.c */
extern int ext4fs_dirhash(const char *name, int len, struct
@@ -1750,8 +1771,8 @@ extern void ext4_error_inode(struct inode *, const char *, unsigned int,
ext4_fsblk_t, const char *, ...)
__attribute__ ((format (printf, 5, 6)));
extern void ext4_error_file(struct file *, const char *, unsigned int,
- const char *, ...)
- __attribute__ ((format (printf, 4, 5)));
+ ext4_fsblk_t, const char *, ...)
+ __attribute__ ((format (printf, 5, 6)));
extern void __ext4_std_error(struct super_block *, const char *,
unsigned int, int);
extern void __ext4_abort(struct super_block *, const char *, unsigned int,
@@ -2060,6 +2081,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
/* page-io.c */
extern int __init ext4_init_pageio(void);
extern void ext4_exit_pageio(void);
+extern void ext4_ioend_wait(struct inode *);
extern void ext4_free_io_end(ext4_io_end_t *io);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
extern int ext4_end_io_nolock(ext4_io_end_t *io);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 28ce70f..2e29abb 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -119,10 +119,6 @@ struct ext4_ext_path {
* structure for external API
*/
-#define EXT4_EXT_CACHE_NO 0
-#define EXT4_EXT_CACHE_GAP 1
-#define EXT4_EXT_CACHE_EXTENT 2
-
/*
* to be called by ext4_ext_walk_space()
* negative retcode - error
@@ -197,7 +193,7 @@ static inline unsigned short ext_depth(struct inode *inode)
static inline void
ext4_ext_invalidate_cache(struct inode *inode)
{
- EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
+ EXT4_I(inode)->i_cached_extent.ec_len = 0;
}
static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
@@ -278,7 +274,7 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
}
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
- sector_t lblocks);
+ ext4_lblk_t lblocks);
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index b0bd792..d8b992e 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -253,7 +253,7 @@ static inline int ext4_journal_force_commit(journal_t *journal)
static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
{
if (ext4_handle_valid(handle))
- return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
+ return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode);
return 0;
}
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0554c48..d202d76 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -117,11 +117,33 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
struct ext4_extent *ex;
depth = path->p_depth;
- /* try to predict block placement */
+ /*
+ * Try to predict block placement assuming that we are
+ * filling in a file which will eventually be
+ * non-sparse --- i.e., in the case of libbfd writing
+ * an ELF object sections out-of-order but in a way
+ * the eventually results in a contiguous object or
+ * executable file, or some database extending a table
+ * space file. However, this is actually somewhat
+ * non-ideal if we are writing a sparse file such as
+ * qemu or KVM writing a raw image file that is going
+ * to stay fairly sparse, since it will end up
+ * fragmenting the file system's free space. Maybe we
+ * should have some hueristics or some way to allow
+ * userspace to pass a hint to file system,
+ * especiially if the latter case turns out to be
+ * common.
+ */
ex = path[depth].p_ext;
- if (ex)
- return (ext4_ext_pblock(ex) +
- (block - le32_to_cpu(ex->ee_block)));
+ if (ex) {
+ ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
+ ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
+
+ if (block > ext_block)
+ return ext_pblk + (block - ext_block);
+ else
+ return ext_pblk - (ext_block - block);
+ }
/* it looks like index is empty;
* try to find starting block from index itself */
@@ -244,7 +266,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
* to allocate @blocks
* Worse case is one block per extent
*/
-int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
+int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
{
struct ext4_inode_info *ei = EXT4_I(inode);
int idxs, num = 0;
@@ -1872,12 +1894,10 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
cbex.ec_block = start;
cbex.ec_len = end - start;
cbex.ec_start = 0;
- cbex.ec_type = EXT4_EXT_CACHE_GAP;
} else {
cbex.ec_block = le32_to_cpu(ex->ee_block);
cbex.ec_len = ext4_ext_get_actual_len(ex);
cbex.ec_start = ext4_ext_pblock(ex);
- cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
}
if (unlikely(cbex.ec_len == 0)) {
@@ -1917,13 +1937,12 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
static void
ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
- __u32 len, ext4_fsblk_t start, int type)
+ __u32 len, ext4_fsblk_t start)
{
struct ext4_ext_cache *cex;
BUG_ON(len == 0);
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
cex = &EXT4_I(inode)->i_cached_extent;
- cex->ec_type = type;
cex->ec_block = block;
cex->ec_len = len;
cex->ec_start = start;
@@ -1976,15 +1995,18 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
}
ext_debug(" -> %u:%lu\n", lblock, len);
- ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP);
+ ext4_ext_put_in_cache(inode, lblock, len, 0);
}
+/*
+ * Return 0 if cache is invalid; 1 if the cache is valid
+ */
static int
ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
struct ext4_extent *ex)
{
struct ext4_ext_cache *cex;
- int ret = EXT4_EXT_CACHE_NO;
+ int ret = 0;
/*
* We borrow i_block_reservation_lock to protect i_cached_extent
@@ -1993,11 +2015,9 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
cex = &EXT4_I(inode)->i_cached_extent;
/* has cache valid data? */
- if (cex->ec_type == EXT4_EXT_CACHE_NO)
+ if (cex->ec_len == 0)
goto errout;
- BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
- cex->ec_type != EXT4_EXT_CACHE_EXTENT);
if (in_range(block, cex->ec_block, cex->ec_len)) {
ex->ee_block = cpu_to_le32(cex->ec_block);
ext4_ext_store_pblock(ex, cex->ec_start);
@@ -2005,7 +2025,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
ext_debug("%u cached by %u:%u:%llu\n",
block,
cex->ec_block, cex->ec_len, cex->ec_start);
- ret = cex->ec_type;
+ ret = 1;
}
errout:
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -2825,14 +2845,14 @@ fix_extent_len:
* to an uninitialized extent.
*
* Writing to an uninitized extent may result in splitting the uninitialized
- * extent into multiple /intialized unintialized extents (up to three)
+ * extent into multiple /initialized uninitialized extents (up to three)
* There are three possibilities:
* a> There is no split required: Entire extent should be uninitialized
* b> Splits in two extents: Write is happening at either end of the extent
* c> Splits in three extents: Somone is writing in middle of the extent
*
* One of more index blocks maybe needed if the extent tree grow after
- * the unintialized extent split. To prevent ENOSPC occur at the IO
+ * the uninitialized extent split. To prevent ENOSPC occur at the IO
* complete, we need to split the uninitialized extent before DIO submit
* the IO. The uninitialized extent called at this time will be split
* into three uninitialized extent(at most). After IO complete, the part
@@ -3082,7 +3102,7 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
* Handle EOFBLOCKS_FL flag, clearing it if necessary
*/
static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
- struct ext4_map_blocks *map,
+ ext4_lblk_t lblk,
struct ext4_ext_path *path,
unsigned int len)
{
@@ -3112,7 +3132,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
* this turns out to be false, we can bail out from this
* function immediately.
*/
- if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) +
+ if (lblk + len < le32_to_cpu(last_ex->ee_block) +
ext4_ext_get_actual_len(last_ex))
return 0;
/*
@@ -3168,8 +3188,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
path);
if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
- err = check_eofblocks_fl(handle, inode, map, path,
- map->m_len);
+ err = check_eofblocks_fl(handle, inode, map->m_lblk,
+ path, map->m_len);
} else
err = ret;
goto out2;
@@ -3199,7 +3219,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
- err = check_eofblocks_fl(handle, inode, map, path, map->m_len);
+ err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
+ map->m_len);
if (err < 0)
goto out2;
}
@@ -3276,7 +3297,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent_header *eh;
struct ext4_extent newex, *ex;
ext4_fsblk_t newblock;
- int err = 0, depth, ret, cache_type;
+ int err = 0, depth, ret;
unsigned int allocated = 0;
struct ext4_allocation_request ar;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@@ -3285,9 +3306,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_lblk, map->m_len, inode->i_ino);
/* check in cache */
- cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex);
- if (cache_type) {
- if (cache_type == EXT4_EXT_CACHE_GAP) {
+ if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
+ if (!newex.ee_start_lo && !newex.ee_start_hi) {
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
/*
* block isn't allocated yet and
@@ -3296,7 +3316,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
goto out2;
}
/* we should allocate requested block */
- } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
+ } else {
/* block is already allocated */
newblock = map->m_lblk
- le32_to_cpu(newex.ee_block)
@@ -3305,8 +3325,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
allocated = ext4_ext_get_actual_len(&newex) -
(map->m_lblk - le32_to_cpu(newex.ee_block));
goto out;
- } else {
- BUG();
}
}
@@ -3357,8 +3375,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
/* Do not put uninitialized extent in the cache */
if (!ext4_ext_is_uninitialized(ex)) {
ext4_ext_put_in_cache(inode, ee_block,
- ee_len, ee_start,
- EXT4_EXT_CACHE_EXTENT);
+ ee_len, ee_start);
goto out;
}
ret = ext4_ext_handle_uninitialized_extents(handle,
@@ -3456,7 +3473,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_flags |= EXT4_MAP_UNINIT;
}
- err = check_eofblocks_fl(handle, inode, map, path, ar.len);
+ err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
if (err)
goto out2;
@@ -3490,8 +3507,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* when it is _not_ an uninitialized extent.
*/
if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
- ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock,
- EXT4_EXT_CACHE_EXTENT);
+ ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
ext4_update_inode_fsync_trans(handle, inode, 1);
} else
ext4_update_inode_fsync_trans(handle, inode, 0);
@@ -3519,6 +3535,12 @@ void ext4_ext_truncate(struct inode *inode)
int err = 0;
/*
+ * finish any pending end_io work so we won't run the risk of
+ * converting any truncated blocks to initialized later
+ */
+ ext4_flush_completed_IO(inode);
+
+ /*
* probably first extent we're gonna free will be last in block
*/
err = ext4_writepage_trans_blocks(inode);
@@ -3767,7 +3789,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
logical = (__u64)newex->ec_block << blksize_bits;
- if (newex->ec_type == EXT4_EXT_CACHE_GAP) {
+ if (newex->ec_start == 0) {
pgoff_t offset;
struct page *page;
struct buffer_head *bh = NULL;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 5a5c55d..bb003dc 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -104,6 +104,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
{
struct super_block *sb = inode->i_sb;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_inode_info *ei = EXT4_I(inode);
struct vfsmount *mnt = filp->f_path.mnt;
struct path path;
char buf[64], *cp;
@@ -127,6 +128,27 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
ext4_mark_super_dirty(sb);
}
}
+ /*
+ * Set up the jbd2_inode if we are opening the inode for
+ * writing and the journal is present
+ */
+ if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) {
+ struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL);
+
+ spin_lock(&inode->i_lock);
+ if (!ei->jinode) {
+ if (!jinode) {
+ spin_unlock(&inode->i_lock);
+ return -ENOMEM;
+ }
+ ei->jinode = jinode;
+ jbd2_journal_init_jbd_inode(ei->jinode, inode);
+ jinode = NULL;
+ }
+ spin_unlock(&inode->i_lock);
+ if (unlikely(jinode != NULL))
+ jbd2_free_inode(jinode);
+ }
return dquot_file_open(inode, filp);
}
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index c1a7bc9..7829b28 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -75,7 +75,7 @@ static void dump_completed_IO(struct inode * inode)
* to written.
* The function return the number of pending IOs on success.
*/
-static int flush_completed_IO(struct inode *inode)
+extern int ext4_flush_completed_IO(struct inode *inode)
{
ext4_io_end_t *io;
struct ext4_inode_info *ei = EXT4_I(inode);
@@ -169,7 +169,7 @@ int ext4_sync_file(struct file *file, int datasync)
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
- ret = flush_completed_IO(inode);
+ ret = ext4_flush_completed_IO(inode);
if (ret < 0)
return ret;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 1ce240a..eb9097a 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1027,7 +1027,7 @@ got:
inode->i_generation = sbi->s_next_generation++;
spin_unlock(&sbi->s_next_gen_lock);
- ei->i_state_flags = 0;
+ ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
ext4_set_inode_state(inode, EXT4_STATE_NEW);
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2d6c6c8..9f7f9e4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -39,7 +39,9 @@
#include <linux/bio.h>
#include <linux/workqueue.h>
#include <linux/kernel.h>
+#include <linux/printk.h>
#include <linux/slab.h>
+#include <linux/ratelimit.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -53,10 +55,18 @@
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
- return jbd2_journal_begin_ordered_truncate(
- EXT4_SB(inode->i_sb)->s_journal,
- &EXT4_I(inode)->jinode,
- new_size);
+ trace_ext4_begin_ordered_truncate(inode, new_size);
+ /*
+ * If jinode is zero, then we never opened the file for
+ * writing, so there's no need to call
+ * jbd2_journal_begin_ordered_truncate() since there's no
+ * outstanding writes we need to flush.
+ */
+ if (!EXT4_I(inode)->jinode)
+ return 0;
+ return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
+ EXT4_I(inode)->jinode,
+ new_size);
}
static void ext4_invalidatepage(struct page *page, unsigned long offset);
@@ -178,6 +188,7 @@ void ext4_evict_inode(struct inode *inode)
handle_t *handle;
int err;
+ trace_ext4_evict_inode(inode);
if (inode->i_nlink) {
truncate_inode_pages(&inode->i_data, 0);
goto no_delete;
@@ -550,7 +561,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
}
/**
- * ext4_blks_to_allocate: Look up the block map and count the number
+ * ext4_blks_to_allocate - Look up the block map and count the number
* of direct blocks need to be allocated for the given branch.
*
* @branch: chain of indirect blocks
@@ -589,13 +600,19 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
/**
* ext4_alloc_blocks: multiple allocate blocks needed for a branch
+ * @handle: handle for this transaction
+ * @inode: inode which needs allocated blocks
+ * @iblock: the logical block to start allocated at
+ * @goal: preferred physical block of allocation
* @indirect_blks: the number of blocks need to allocate for indirect
* blocks
- *
+ * @blks: number of desired blocks
* @new_blocks: on return it will store the new block numbers for
* the indirect blocks(if needed) and the first direct block,
- * @blks: on return it will store the total number of allocated
- * direct blocks
+ * @err: on return it will store the error code
+ *
+ * This function will return the number of blocks allocated as
+ * requested by the passed-in parameters.
*/
static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, ext4_fsblk_t goal,
@@ -709,9 +726,11 @@ failed_out:
/**
* ext4_alloc_branch - allocate and set up a chain of blocks.
+ * @handle: handle for this transaction
* @inode: owner
* @indirect_blks: number of allocated indirect blocks
* @blks: number of allocated direct blocks
+ * @goal: preferred place for allocation
* @offsets: offsets (in the blocks) to store the pointers to next.
* @branch: place to store the chain in.
*
@@ -824,6 +843,7 @@ failed:
/**
* ext4_splice_branch - splice the allocated branch onto inode.
+ * @handle: handle for this transaction
* @inode: owner
* @block: (logical) number of block we are adding
* @chain: chain of indirect blocks (with a missing link - see
@@ -1079,7 +1099,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode,
* Calculate the number of metadata blocks need to reserve
* to allocate a block located at @lblock
*/
-static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
+static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
{
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
return ext4_ext_calc_metadata_amount(inode, lblock);
@@ -1318,7 +1338,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* avoid double accounting
*/
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
- EXT4_I(inode)->i_delalloc_reserved_flag = 1;
+ ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
/*
* We need to check for EXT4 here because migrate
* could have changed the inode type in between
@@ -1348,7 +1368,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
ext4_da_update_reserve_space(inode, retval, 1);
}
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
- EXT4_I(inode)->i_delalloc_reserved_flag = 0;
+ ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@ -1876,7 +1896,7 @@ static int ext4_journalled_write_end(struct file *file,
/*
* Reserve a single block located at lblock
*/
-static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
+static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
{
int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2123,9 +2143,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
*/
if (unlikely(journal_data && PageChecked(page)))
err = __ext4_journalled_writepage(page, len);
- else
+ else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
err = ext4_bio_write_page(&io_submit, page,
len, mpd->wbc);
+ else
+ err = block_write_full_page(page,
+ noalloc_get_block_write, mpd->wbc);
if (!err)
mpd->pages_written++;
@@ -2234,7 +2257,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
* affects functions in many different parts of the allocation
* call path. This flag exists primarily because we don't
* want to change *many* call functions, so ext4_map_blocks()
- * will set the magic i_delalloc_reserved_flag once the
+ * will set the EXT4_STATE_DELALLOC_RESERVED flag once the
* inode's allocation semaphore is taken.
*
* If the blocks in questions were delalloc blocks, set
@@ -2718,7 +2741,7 @@ static int ext4_writepage(struct page *page,
* try to create them using __block_write_begin. If this
* fails, redirty the page and move on.
*/
- if (!page_buffers(page)) {
+ if (!page_has_buffers(page)) {
if (__block_write_begin(page, 0, len,
noalloc_get_block_write)) {
redirty_page:
@@ -2732,12 +2755,10 @@ static int ext4_writepage(struct page *page,
if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
ext4_bh_delay_or_unwritten)) {
/*
- * We don't want to do block allocation So redirty the
- * page and return We may reach here when we do a
- * journal commit via
- * journal_submit_inode_data_buffers. If we don't
- * have mapping block we just ignore them. We can also
- * reach here via shrink_page_list
+ * We don't want to do block allocation, so redirty
+ * the page and return. We may reach here when we do
+ * a journal commit via journal_submit_inode_data_buffers.
+ * We can also reach here via shrink_page_list
*/
goto redirty_page;
}
@@ -3359,7 +3380,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
* doing I/O at all.
*
* We could call write_cache_pages(), and then redirty all of
- * the pages by calling redirty_page_for_writeback() but that
+ * the pages by calling redirty_page_for_writepage() but that
* would be ugly in the extreme. So instead we would need to
* replicate parts of the code in the above functions,
* simplifying them becuase we wouldn't actually intend to
@@ -3717,8 +3738,7 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
retry:
io_end = ext4_init_io_end(inode, GFP_ATOMIC);
if (!io_end) {
- if (printk_ratelimit())
- printk(KERN_WARNING "%s: allocation fail\n", __func__);
+ pr_warn_ratelimited("%s: allocation fail\n", __func__);
schedule();
goto retry;
}
@@ -3742,9 +3762,9 @@ retry:
* preallocated extents, and those write extend the file, no need to
* fall back to buffered IO.
*
- * For holes, we fallocate those blocks, mark them as unintialized
+ * For holes, we fallocate those blocks, mark them as uninitialized
* If those blocks were preallocated, we mark sure they are splited, but
- * still keep the range to write as unintialized.
+ * still keep the range to write as uninitialized.
*
* The unwrritten extents will be converted to written when DIO is completed.
* For async direct IO, since the IO may still pending when return, we
@@ -4042,7 +4062,7 @@ int ext4_block_truncate_page(handle_t *handle,
if (ext4_should_journal_data(inode)) {
err = ext4_handle_dirty_metadata(handle, inode, bh);
} else {
- if (ext4_should_order_data(inode))
+ if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
err = ext4_jbd2_file_inode(handle, inode);
mark_buffer_dirty(bh);
}
@@ -4166,6 +4186,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
{
__le32 *p;
int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
+ int err;
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
flags |= EXT4_FREE_BLOCKS_METADATA;
@@ -4181,11 +4202,23 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
if (try_to_extend_transaction(handle, inode)) {
if (bh) {
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- ext4_handle_dirty_metadata(handle, inode, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
+ if (unlikely(err)) {
+ ext4_std_error(inode->i_sb, err);
+ return 1;
+ }
+ }
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (unlikely(err)) {
+ ext4_std_error(inode->i_sb, err);
+ return 1;
+ }
+ err = ext4_truncate_restart_trans(handle, inode,
+ blocks_for_truncate(inode));
+ if (unlikely(err)) {
+ ext4_std_error(inode->i_sb, err);
+ return 1;
}
- ext4_mark_inode_dirty(handle, inode);
- ext4_truncate_restart_trans(handle, inode,
- blocks_for_truncate(inode));
if (bh) {
BUFFER_TRACE(bh, "retaking write access");
ext4_journal_get_write_access(handle, bh);
@@ -4346,6 +4379,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
(__le32 *) bh->b_data,
(__le32 *) bh->b_data + addr_per_block,
depth);
+ brelse(bh);
/*
* Everything below this this pointer has been
@@ -4856,7 +4890,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
}
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
- ei->i_state_flags = 0;
+ ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
ei->i_dir_start_lookup = 0;
ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
/* We now have enough fields to check if the inode was active or not.
@@ -5115,7 +5149,7 @@ static int ext4_do_update_inode(handle_t *handle,
if (ext4_inode_blocks_set(handle, raw_inode, ei))
goto out_brelse;
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
- raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+ raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_HURD))
raw_inode->i_file_acl_high =
@@ -5412,9 +5446,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
* will return the blocks that include the delayed allocation
* blocks for this file.
*/
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
return 0;
@@ -5651,6 +5683,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
int err, ret;
might_sleep();
+ trace_ext4_mark_inode_dirty(inode, _RET_IP_);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (ext4_handle_valid(handle) &&
EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bf5ae88..eb3bc2f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -331,6 +331,30 @@ mext_out:
return err;
}
+ case FITRIM:
+ {
+ struct super_block *sb = inode->i_sb;
+ struct fstrim_range range;
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&range, (struct fstrim_range *)arg,
+ sizeof(range)))
+ return -EFAULT;
+
+ ret = ext4_trim_fs(sb, &range);
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user((struct fstrim_range *)arg, &range,
+ sizeof(range)))
+ return -EFAULT;
+
+ return 0;
+ }
+
default:
return -ENOTTY;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c58eba34..851f49b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2608,18 +2608,12 @@ int ext4_mb_release(struct super_block *sb)
static inline int ext4_issue_discard(struct super_block *sb,
ext4_group_t block_group, ext4_grpblk_t block, int count)
{
- int ret;
ext4_fsblk_t discard_block;
discard_block = block + ext4_group_first_block_no(sb, block_group);
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
- ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
- if (ret == -EOPNOTSUPP) {
- ext4_warning(sb, "discard not supported, disabling");
- clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
- }
- return ret;
+ return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
}
/*
@@ -2631,7 +2625,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
struct super_block *sb = journal->j_private;
struct ext4_buddy e4b;
struct ext4_group_info *db;
- int err, count = 0, count2 = 0;
+ int err, ret, count = 0, count2 = 0;
struct ext4_free_data *entry;
struct list_head *l, *ltmp;
@@ -2641,9 +2635,15 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
entry->count, entry->group, entry);
- if (test_opt(sb, DISCARD))
- ext4_issue_discard(sb, entry->group,
+ if (test_opt(sb, DISCARD)) {
+ ret = ext4_issue_discard(sb, entry->group,
entry->start_blk, entry->count);
+ if (unlikely(ret == -EOPNOTSUPP)) {
+ ext4_warning(sb, "discard not supported, "
+ "disabling");
+ clear_opt(sb, DISCARD);
+ }
+ }
err = ext4_mb_load_buddy(sb, entry->group, &e4b);
/* we expect to find existing buddy because it's pinned */
@@ -3881,19 +3881,6 @@ repeat:
}
}
-/*
- * finds all preallocated spaces and return blocks being freed to them
- * if preallocated space becomes full (no block is used from the space)
- * then the function frees space in buddy
- * XXX: at the moment, truncate (which is the only way to free blocks)
- * discards all preallocations
- */
-static void ext4_mb_return_to_preallocation(struct inode *inode,
- struct ext4_buddy *e4b,
- sector_t block, int count)
-{
- BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
-}
#ifdef CONFIG_EXT4_DEBUG
static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
{
@@ -4283,7 +4270,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
* EDQUOT check, as blocks and quotas have been already
* reserved when data being copied into pagecache.
*/
- if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+ if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
ar->flags |= EXT4_MB_DELALLOC_RESERVED;
else {
/* Without delayed allocation we need to verify
@@ -4380,7 +4367,8 @@ out:
if (inquota && ar->len < inquota)
dquot_free_block(ar->inode, inquota - ar->len);
if (!ar->len) {
- if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+ if (!ext4_test_inode_state(ar->inode,
+ EXT4_STATE_DELALLOC_RESERVED))
/* release all the reserved blocks if non delalloc */
percpu_counter_sub(&sbi->s_dirtyblocks_counter,
reserv_blks);
@@ -4626,7 +4614,11 @@ do_more:
* blocks being freed are metadata. these blocks shouldn't
* be used until this transaction is committed
*/
- new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+ new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+ if (!new_entry) {
+ err = -ENOMEM;
+ goto error_return;
+ }
new_entry->start_blk = bit;
new_entry->group = block_group;
new_entry->count = count;
@@ -4640,12 +4632,9 @@ do_more:
* with group lock held. generate_buddy look at
* them with group lock_held
*/
- if (test_opt(sb, DISCARD))
- ext4_issue_discard(sb, block_group, bit, count);
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_free_blocks(inode, &e4b, bit, count);
- ext4_mb_return_to_preallocation(inode, &e4b, block, count);
}
ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4720,8 +4709,6 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
ext4_unlock_group(sb, group);
ret = ext4_issue_discard(sb, group, start, count);
- if (ret)
- ext4_std_error(sb, ret);
ext4_lock_group(sb, group);
mb_free_blocks(NULL, e4b, start, ex.fe_len);
@@ -4821,6 +4808,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
ext4_group_t group, ngroups = ext4_get_groups_count(sb);
ext4_grpblk_t cnt = 0, first_block, last_block;
uint64_t start, len, minlen, trimmed;
+ ext4_fsblk_t first_data_blk =
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
int ret = 0;
start = range->start >> sb->s_blocksize_bits;
@@ -4830,6 +4819,10 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
return -EINVAL;
+ if (start < first_data_blk) {
+ len -= first_data_blk - start;
+ start = first_data_blk;
+ }
/* Determine first and last group to examine based on start and len */
ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
@@ -4853,7 +4846,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (len >= EXT4_BLOCKS_PER_GROUP(sb))
len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
else
- last_block = len;
+ last_block = first_block + len;
if (e4b.bd_info->bb_free >= minlen) {
cnt = ext4_trim_all_free(sb, &e4b, first_block,
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 25f3a97..b0a126f 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -496,7 +496,7 @@ int ext4_ext_migrate(struct inode *inode)
goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
- S_IFREG, 0, goal);
+ S_IFREG, NULL, goal);
if (IS_ERR(tmp_inode)) {
retval = -ENOMEM;
ext4_journal_stop(handle);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 92203b8..5485390 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -581,9 +581,9 @@ static int htree_dirblock_to_tree(struct file *dir_file,
dir->i_sb->s_blocksize -
EXT4_DIR_REC_LEN(0));
for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
- if (!ext4_check_dir_entry(dir, de, bh,
- (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
- +((char *)de - bh->b_data))) {
+ if (ext4_check_dir_entry(dir, NULL, de, bh,
+ (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
+ + ((char *)de - bh->b_data))) {
/* On error, skip the f_pos to the next block. */
dir_file->f_pos = (dir_file->f_pos |
(dir->i_sb->s_blocksize - 1)) + 1;
@@ -820,7 +820,7 @@ static inline int search_dirblock(struct buffer_head *bh,
if ((char *) de + namelen <= dlimit &&
ext4_match (namelen, name, de)) {
/* found a match - just to be sure, do a full check */
- if (!ext4_check_dir_entry(dir, de, bh, offset))
+ if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
return -1;
*res_dir = de;
return 1;
@@ -872,7 +872,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
if (namelen > EXT4_NAME_LEN)
return NULL;
if ((namelen <= 2) && (name[0] == '.') &&
- (name[1] == '.' || name[1] == '0')) {
+ (name[1] == '.' || name[1] == '\0')) {
/*
* "." or ".." will only be in the first block
* NFS may look up ".."; "." should be handled by the VFS
@@ -1036,7 +1036,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
return ERR_PTR(-EIO);
}
inode = ext4_iget(dir->i_sb, ino);
- if (unlikely(IS_ERR(inode))) {
+ if (IS_ERR(inode)) {
if (PTR_ERR(inode) == -ESTALE) {
EXT4_ERROR_INODE(dir,
"deleted inode referenced: %u",
@@ -1269,7 +1269,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
de = (struct ext4_dir_entry_2 *)bh->b_data;
top = bh->b_data + blocksize - reclen;
while ((char *) de <= top) {
- if (!ext4_check_dir_entry(dir, de, bh, offset))
+ if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
return -EIO;
if (ext4_match(namelen, name, de))
return -EEXIST;
@@ -1602,7 +1602,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (err)
goto journal_error;
}
- ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
+ err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
+ if (err) {
+ ext4_std_error(inode->i_sb, err);
+ goto cleanup;
+ }
}
de = do_split(handle, dir, &bh, frame, &hinfo, &err);
if (!de)
@@ -1630,17 +1634,21 @@ static int ext4_delete_entry(handle_t *handle,
{
struct ext4_dir_entry_2 *de, *pde;
unsigned int blocksize = dir->i_sb->s_blocksize;
- int i;
+ int i, err;
i = 0;
pde = NULL;
de = (struct ext4_dir_entry_2 *) bh->b_data;
while (i < bh->b_size) {
- if (!ext4_check_dir_entry(dir, de, bh, i))
+ if (ext4_check_dir_entry(dir, NULL, de, bh, i))
return -EIO;
if (de == de_del) {
BUFFER_TRACE(bh, "get_write_access");
- ext4_journal_get_write_access(handle, bh);
+ err = ext4_journal_get_write_access(handle, bh);
+ if (unlikely(err)) {
+ ext4_std_error(dir->i_sb, err);
+ return err;
+ }
if (pde)
pde->rec_len = ext4_rec_len_to_disk(
ext4_rec_len_from_disk(pde->rec_len,
@@ -1652,7 +1660,11 @@ static int ext4_delete_entry(handle_t *handle,
de->inode = 0;
dir->i_version++;
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- ext4_handle_dirty_metadata(handle, dir, bh);
+ err = ext4_handle_dirty_metadata(handle, dir, bh);
+ if (unlikely(err)) {
+ ext4_std_error(dir->i_sb, err);
+ return err;
+ }
return 0;
}
i += ext4_rec_len_from_disk(de->rec_len, blocksize);
@@ -1789,7 +1801,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
handle_t *handle;
struct inode *inode;
- struct buffer_head *dir_block;
+ struct buffer_head *dir_block = NULL;
struct ext4_dir_entry_2 *de;
unsigned int blocksize = dir->i_sb->s_blocksize;
int err, retries = 0;
@@ -1822,7 +1834,9 @@ retry:
if (!dir_block)
goto out_clear_inode;
BUFFER_TRACE(dir_block, "get_write_access");
- ext4_journal_get_write_access(handle, dir_block);
+ err = ext4_journal_get_write_access(handle, dir_block);
+ if (err)
+ goto out_clear_inode;
de = (struct ext4_dir_entry_2 *) dir_block->b_data;
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
@@ -1839,10 +1853,12 @@ retry:
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
inode->i_nlink = 2;
BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
- ext4_handle_dirty_metadata(handle, dir, dir_block);
- brelse(dir_block);
- ext4_mark_inode_dirty(handle, inode);
- err = ext4_add_entry(handle, dentry, inode);
+ err = ext4_handle_dirty_metadata(handle, dir, dir_block);
+ if (err)
+ goto out_clear_inode;
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (!err)
+ err = ext4_add_entry(handle, dentry, inode);
if (err) {
out_clear_inode:
clear_nlink(inode);
@@ -1853,10 +1869,13 @@ out_clear_inode:
}
ext4_inc_count(handle, dir);
ext4_update_dx_flag(dir);
- ext4_mark_inode_dirty(handle, dir);
+ err = ext4_mark_inode_dirty(handle, dir);
+ if (err)
+ goto out_clear_inode;
d_instantiate(dentry, inode);
unlock_new_inode(inode);
out_stop:
+ brelse(dir_block);
ext4_journal_stop(handle);
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
goto retry;
@@ -1919,7 +1938,7 @@ static int empty_dir(struct inode *inode)
}
de = (struct ext4_dir_entry_2 *) bh->b_data;
}
- if (!ext4_check_dir_entry(inode, de, bh, offset)) {
+ if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) {
de = (struct ext4_dir_entry_2 *)(bh->b_data +
sb->s_blocksize);
offset = (offset | (sb->s_blocksize - 1)) + 1;
@@ -2407,7 +2426,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
ext4_current_time(new_dir);
ext4_mark_inode_dirty(handle, new_dir);
BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
- ext4_handle_dirty_metadata(handle, new_dir, new_bh);
+ retval = ext4_handle_dirty_metadata(handle, new_dir, new_bh);
+ if (unlikely(retval)) {
+ ext4_std_error(new_dir->i_sb, retval);
+ goto end_rename;
+ }
brelse(new_bh);
new_bh = NULL;
}
@@ -2459,7 +2482,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
cpu_to_le32(new_dir->i_ino);
BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
- ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
+ retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
+ if (retval) {
+ ext4_std_error(old_dir->i_sb, retval);
+ goto end_rename;
+ }
ext4_dec_count(handle, old_dir);
if (new_inode) {
/* checked empty_dir above, can't have another parent,
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 46a7d6a..7270dcf 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,16 +32,24 @@
static struct kmem_cache *io_page_cachep, *io_end_cachep;
+#define WQ_HASH_SZ 37
+#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
+static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
+
int __init ext4_init_pageio(void)
{
+ int i;
+
io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
if (io_page_cachep == NULL)
return -ENOMEM;
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
- if (io_page_cachep == NULL) {
+ if (io_end_cachep == NULL) {
kmem_cache_destroy(io_page_cachep);
return -ENOMEM;
}
+ for (i = 0; i < WQ_HASH_SZ; i++)
+ init_waitqueue_head(&ioend_wq[i]);
return 0;
}
@@ -52,24 +60,37 @@ void ext4_exit_pageio(void)
kmem_cache_destroy(io_page_cachep);
}
+void ext4_ioend_wait(struct inode *inode)
+{
+ wait_queue_head_t *wq = to_ioend_wq(inode);
+
+ wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
+}
+
+static void put_io_page(struct ext4_io_page *io_page)
+{
+ if (atomic_dec_and_test(&io_page->p_count)) {
+ end_page_writeback(io_page->p_page);
+ put_page(io_page->p_page);
+ kmem_cache_free(io_page_cachep, io_page);
+ }
+}
+
void ext4_free_io_end(ext4_io_end_t *io)
{
int i;
+ wait_queue_head_t *wq;
BUG_ON(!io);
if (io->page)
put_page(io->page);
- for (i = 0; i < io->num_io_pages; i++) {
- if (--io->pages[i]->p_count == 0) {
- struct page *page = io->pages[i]->p_page;
-
- end_page_writeback(page);
- put_page(page);
- kmem_cache_free(io_page_cachep, io->pages[i]);
- }
- }
+ for (i = 0; i < io->num_io_pages; i++)
+ put_io_page(io->pages[i]);
io->num_io_pages = 0;
- iput(io->inode);
+ wq = to_ioend_wq(io->inode);
+ if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
+ waitqueue_active(wq))
+ wake_up_all(wq);
kmem_cache_free(io_end_cachep, io);
}
@@ -137,13 +158,10 @@ static void ext4_end_io_work(struct work_struct *work)
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
{
- ext4_io_end_t *io = NULL;
-
- io = kmem_cache_alloc(io_end_cachep, flags);
+ ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
if (io) {
- memset(io, 0, sizeof(*io));
- io->inode = igrab(inode);
- BUG_ON(!io->inode);
+ atomic_inc(&EXT4_I(inode)->i_ioend_count);
+ io->inode = inode;
INIT_WORK(&io->work, ext4_end_io_work);
INIT_LIST_HEAD(&io->list);
}
@@ -171,35 +189,15 @@ static void ext4_end_bio(struct bio *bio, int error)
struct workqueue_struct *wq;
struct inode *inode;
unsigned long flags;
- ext4_fsblk_t err_block;
int i;
BUG_ON(!io_end);
- inode = io_end->inode;
bio->bi_private = NULL;
bio->bi_end_io = NULL;
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
error = 0;
- err_block = bio->bi_sector >> (inode->i_blkbits - 9);
bio_put(bio);
- if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
- pr_err("sb umounted, discard end_io request for inode %lu\n",
- io_end->inode->i_ino);
- ext4_free_io_end(io_end);
- return;
- }
-
- if (error) {
- io_end->flag |= EXT4_IO_END_ERROR;
- ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
- "(offset %llu size %ld starting block %llu)",
- inode->i_ino,
- (unsigned long long) io_end->offset,
- (long) io_end->size,
- (unsigned long long) err_block);
- }
-
for (i = 0; i < io_end->num_io_pages; i++) {
struct page *page = io_end->pages[i]->p_page;
struct buffer_head *bh, *head;
@@ -236,14 +234,6 @@ static void ext4_end_bio(struct bio *bio, int error)
} while (bh != head);
}
- if (--io_end->pages[i]->p_count == 0) {
- struct page *page = io_end->pages[i]->p_page;
-
- end_page_writeback(page);
- put_page(page);
- kmem_cache_free(io_page_cachep, io_end->pages[i]);
- }
-
/*
* If this is a partial write which happened to make
* all buffers uptodate then we can optimize away a
@@ -253,9 +243,22 @@ static void ext4_end_bio(struct bio *bio, int error)
*/
if (!partial_write)
SetPageUptodate(page);
- }
+ put_io_page(io_end->pages[i]);
+ }
io_end->num_io_pages = 0;
+ inode = io_end->inode;
+
+ if (error) {
+ io_end->flag |= EXT4_IO_END_ERROR;
+ ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
+ "(offset %llu size %ld starting block %llu)",
+ inode->i_ino,
+ (unsigned long long) io_end->offset,
+ (long) io_end->size,
+ (unsigned long long)
+ bio->bi_sector >> (inode->i_blkbits - 9));
+ }
/* Add the io_end to per-inode completed io list*/
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -305,7 +308,6 @@ static int io_submit_init(struct ext4_io_submit *io,
bio->bi_private = io->io_end = io_end;
bio->bi_end_io = ext4_end_bio;
- io_end->inode = inode;
io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
io->io_bio = bio;
@@ -360,7 +362,7 @@ submit_and_retry:
if ((io_end->num_io_pages == 0) ||
(io_end->pages[io_end->num_io_pages-1] != io_page)) {
io_end->pages[io_end->num_io_pages++] = io_page;
- io_page->p_count++;
+ atomic_inc(&io_page->p_count);
}
return 0;
}
@@ -389,7 +391,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
return -ENOMEM;
}
io_page->p_page = page;
- io_page->p_count = 0;
+ atomic_set(&io_page->p_count, 1);
get_page(page);
for (bh = head = page_buffers(page), block_start = 0;
@@ -421,10 +423,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
* PageWriteback bit from the page to prevent the system from
* wedging later on.
*/
- if (io_page->p_count == 0) {
- put_page(page);
- end_page_writeback(page);
- kmem_cache_free(io_page_cachep, io_page);
- }
+ put_io_page(io_page);
return ret;
}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index dc96392..3ecc6e4 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -220,7 +220,11 @@ static int setup_new_group_blocks(struct super_block *sb,
memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
unlock_buffer(gdb);
- ext4_handle_dirty_metadata(handle, NULL, gdb);
+ err = ext4_handle_dirty_metadata(handle, NULL, gdb);
+ if (unlikely(err)) {
+ brelse(gdb);
+ goto exit_bh;
+ }
ext4_set_bit(bit, bh->b_data);
brelse(gdb);
}
@@ -232,6 +236,8 @@ static int setup_new_group_blocks(struct super_block *sb,
GFP_NOFS);
if (err)
goto exit_bh;
+ for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++)
+ ext4_set_bit(bit, bh->b_data);
ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
input->block_bitmap - start);
@@ -247,13 +253,20 @@ static int setup_new_group_blocks(struct super_block *sb,
err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
if (err)
goto exit_bh;
+ for (i = 0, bit = input->inode_table - start;
+ i < sbi->s_itb_per_group; i++, bit++)
+ ext4_set_bit(bit, bh->b_data);
if ((err = extend_or_restart_transaction(handle, 2, bh)))
goto exit_bh;
ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
bh->b_data);
- ext4_handle_dirty_metadata(handle, NULL, bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (unlikely(err)) {
+ ext4_std_error(sb, err);
+ goto exit_bh;
+ }
brelse(bh);
/* Mark unused entries in inode bitmap used */
ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
@@ -265,7 +278,9 @@ static int setup_new_group_blocks(struct super_block *sb,
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
- ext4_handle_dirty_metadata(handle, NULL, bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (unlikely(err))
+ ext4_std_error(sb, err);
exit_bh:
brelse(bh);
@@ -417,17 +432,21 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
goto exit_dind;
}
- if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh)))
+ err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+ if (unlikely(err))
goto exit_dind;
- if ((err = ext4_journal_get_write_access(handle, *primary)))
+ err = ext4_journal_get_write_access(handle, *primary);
+ if (unlikely(err))
goto exit_sbh;
- if ((err = ext4_journal_get_write_access(handle, dind)))
- goto exit_primary;
+ err = ext4_journal_get_write_access(handle, dind);
+ if (unlikely(err))
+ ext4_std_error(sb, err);
/* ext4_reserve_inode_write() gets a reference on the iloc */
- if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (unlikely(err))
goto exit_dindj;
n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
@@ -449,12 +468,20 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
* reserved inode, and will become GDT blocks (primary and backup).
*/
data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
- ext4_handle_dirty_metadata(handle, NULL, dind);
- brelse(dind);
+ err = ext4_handle_dirty_metadata(handle, NULL, dind);
+ if (unlikely(err)) {
+ ext4_std_error(sb, err);
+ goto exit_inode;
+ }
inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
ext4_mark_iloc_dirty(handle, inode, &iloc);
memset((*primary)->b_data, 0, sb->s_blocksize);
- ext4_handle_dirty_metadata(handle, NULL, *primary);
+ err = ext4_handle_dirty_metadata(handle, NULL, *primary);
+ if (unlikely(err)) {
+ ext4_std_error(sb, err);
+ goto exit_inode;
+ }
+ brelse(dind);
o_group_desc = EXT4_SB(sb)->s_group_desc;
memcpy(n_group_desc, o_group_desc,
@@ -465,19 +492,19 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
kfree(o_group_desc);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
- ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
+ err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
+ if (err)
+ ext4_std_error(sb, err);
- return 0;
+ return err;
exit_inode:
/* ext4_journal_release_buffer(handle, iloc.bh); */
brelse(iloc.bh);
exit_dindj:
/* ext4_journal_release_buffer(handle, dind); */
-exit_primary:
- /* ext4_journal_release_buffer(handle, *primary); */
exit_sbh:
- /* ext4_journal_release_buffer(handle, *primary); */
+ /* ext4_journal_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
exit_dind:
brelse(dind);
exit_bh:
@@ -660,7 +687,9 @@ static void update_backups(struct super_block *sb,
memset(bh->b_data + size, 0, rest);
set_buffer_uptodate(bh);
unlock_buffer(bh);
- ext4_handle_dirty_metadata(handle, NULL, bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (unlikely(err))
+ ext4_std_error(sb, err);
brelse(bh);
}
if ((err2 = ext4_journal_stop(handle)) && !err)
@@ -878,7 +907,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
/* Update the global fs size fields */
sbi->s_groups_count++;
- ext4_handle_dirty_metadata(handle, NULL, primary);
+ err = ext4_handle_dirty_metadata(handle, NULL, primary);
+ if (unlikely(err)) {
+ ext4_std_error(sb, err);
+ goto exit_journal;
+ }
/* Update the reserved block counts only once the new group is
* active. */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0348ce0..29c80f6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -73,8 +73,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
static void ext4_write_super(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt);
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data);
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
@@ -82,7 +82,7 @@ static void ext4_unregister_li_request(struct super_block *sb);
static struct file_system_type ext3_fs_type = {
.owner = THIS_MODULE,
.name = "ext3",
- .get_sb = ext4_get_sb,
+ .mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -388,13 +388,14 @@ static void ext4_handle_error(struct super_block *sb)
void __ext4_error(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ",
- sb->s_id, function, line, current->comm);
- vprintk(fmt, args);
- printk("\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
+ sb->s_id, function, line, current->comm, &vaf);
va_end(args);
ext4_handle_error(sb);
@@ -405,28 +406,31 @@ void ext4_error_inode(struct inode *inode, const char *function,
const char *fmt, ...)
{
va_list args;
+ struct va_format vaf;
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
es->s_last_error_block = cpu_to_le64(block);
save_error_info(inode->i_sb, function, line);
va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
inode->i_sb->s_id, function, line, inode->i_ino);
if (block)
- printk("block %llu: ", block);
- printk("comm %s: ", current->comm);
- vprintk(fmt, args);
- printk("\n");
+ printk(KERN_CONT "block %llu: ", block);
+ printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf);
va_end(args);
ext4_handle_error(inode->i_sb);
}
void ext4_error_file(struct file *file, const char *function,
- unsigned int line, const char *fmt, ...)
+ unsigned int line, ext4_fsblk_t block,
+ const char *fmt, ...)
{
va_list args;
+ struct va_format vaf;
struct ext4_super_block *es;
struct inode *inode = file->f_dentry->d_inode;
char pathname[80], *path;
@@ -434,17 +438,18 @@ void ext4_error_file(struct file *file, const char *function,
es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
save_error_info(inode->i_sb, function, line);
- va_start(args, fmt);
path = d_path(&(file->f_path), pathname, sizeof(pathname));
- if (!path)
+ if (IS_ERR(path))
path = "(unknown)";
printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu "
- "(comm %s path %s): ",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, path);
- vprintk(fmt, args);
- printk("\n");
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
+ inode->i_sb->s_id, function, line, inode->i_ino);
+ if (block)
+ printk(KERN_CONT "block %llu: ", block);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf);
va_end(args);
ext4_handle_error(inode->i_sb);
@@ -543,28 +548,29 @@ void __ext4_abort(struct super_block *sb, const char *function,
panic("EXT4-fs panic from previous error\n");
}
-void ext4_msg (struct super_block * sb, const char *prefix,
- const char *fmt, ...)
+void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
- vprintk(fmt, args);
- printk("\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
va_end(args);
}
void __ext4_warning(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ",
- sb->s_id, function, line);
- vprintk(fmt, args);
- printk("\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
+ sb->s_id, function, line, &vaf);
va_end(args);
}
@@ -575,21 +581,25 @@ void __ext4_grp_locked_error(const char *function, unsigned int line,
__releases(bitlock)
__acquires(bitlock)
{
+ struct va_format vaf;
va_list args;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
es->s_last_error_ino = cpu_to_le32(ino);
es->s_last_error_block = cpu_to_le64(block);
__save_error_info(sb, function, line);
+
va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
sb->s_id, function, line, grp);
if (ino)
- printk("inode %lu: ", ino);
+ printk(KERN_CONT "inode %lu: ", ino);
if (block)
- printk("block %llu:", (unsigned long long) block);
- vprintk(fmt, args);
- printk("\n");
+ printk(KERN_CONT "block %llu:", (unsigned long long) block);
+ printk(KERN_CONT "%pV\n", &vaf);
va_end(args);
if (test_opt(sb, ERRORS_CONT)) {
@@ -808,32 +818,43 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
- /*
- * Note: We can be called before EXT4_SB(sb)->s_journal is set,
- * therefore it can be null here. Don't check it, just initialize
- * jinode.
- */
- jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0;
- ei->i_delalloc_reserved_flag = 0;
spin_lock_init(&(ei->i_block_reservation_lock));
#ifdef CONFIG_QUOTA
ei->i_reserved_quota = 0;
#endif
+ ei->jinode = NULL;
INIT_LIST_HEAD(&ei->i_completed_io_list);
spin_lock_init(&ei->i_completed_io_lock);
ei->cur_aio_dio = NULL;
ei->i_sync_tid = 0;
ei->i_datasync_tid = 0;
+ atomic_set(&ei->i_ioend_count, 0);
return &ei->vfs_inode;
}
+static int ext4_drop_inode(struct inode *inode)
+{
+ int drop = generic_drop_inode(inode);
+
+ trace_ext4_drop_inode(inode, drop);
+ return drop;
+}
+
+static void ext4_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
+}
+
static void ext4_destroy_inode(struct inode *inode)
{
+ ext4_ioend_wait(inode);
if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
ext4_msg(inode->i_sb, KERN_ERR,
"Inode %lu (%p): orphan list check failed!",
@@ -843,7 +864,7 @@ static void ext4_destroy_inode(struct inode *inode)
true);
dump_stack();
}
- kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
+ call_rcu(&inode->i_rcu, ext4_i_callback);
}
static void init_once(void *foo)
@@ -881,9 +902,12 @@ void ext4_clear_inode(struct inode *inode)
end_writeback(inode);
dquot_drop(inode);
ext4_discard_preallocations(inode);
- if (EXT4_JOURNAL(inode))
- jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
- &EXT4_I(inode)->jinode);
+ if (EXT4_I(inode)->jinode) {
+ jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
+ EXT4_I(inode)->jinode);
+ jbd2_free_inode(EXT4_I(inode)->jinode);
+ EXT4_I(inode)->jinode = NULL;
+ }
}
static inline void ext4_show_quota_options(struct seq_file *seq,
@@ -1016,6 +1040,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
!(def_mount_opts & EXT4_DEFM_NODELALLOC))
seq_puts(seq, ",nodelalloc");
+ if (test_opt(sb, MBLK_IO_SUBMIT))
+ seq_puts(seq, ",mblk_io_submit");
if (sbi->s_stripe)
seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
/*
@@ -1173,6 +1199,7 @@ static const struct super_operations ext4_sops = {
.destroy_inode = ext4_destroy_inode,
.write_inode = ext4_write_inode,
.dirty_inode = ext4_dirty_inode,
+ .drop_inode = ext4_drop_inode,
.evict_inode = ext4_evict_inode,
.put_super = ext4_put_super,
.sync_fs = ext4_sync_fs,
@@ -1186,7 +1213,6 @@ static const struct super_operations ext4_sops = {
.quota_write = ext4_quota_write,
#endif
.bdev_try_to_free_page = bdev_try_to_free_page,
- .trim_fs = ext4_trim_fs
};
static const struct super_operations ext4_nojournal_sops = {
@@ -1194,6 +1220,7 @@ static const struct super_operations ext4_nojournal_sops = {
.destroy_inode = ext4_destroy_inode,
.write_inode = ext4_write_inode,
.dirty_inode = ext4_dirty_inode,
+ .drop_inode = ext4_drop_inode,
.evict_inode = ext4_evict_inode,
.write_super = ext4_write_super,
.put_super = ext4_put_super,
@@ -1228,8 +1255,8 @@ enum {
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
- Opt_stripe, Opt_delalloc, Opt_nodelalloc,
- Opt_block_validity, Opt_noblock_validity,
+ Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+ Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard,
@@ -1293,6 +1320,8 @@ static const match_table_t tokens = {
{Opt_resize, "resize"},
{Opt_delalloc, "delalloc"},
{Opt_nodelalloc, "nodelalloc"},
+ {Opt_mblk_io_submit, "mblk_io_submit"},
+ {Opt_nomblk_io_submit, "nomblk_io_submit"},
{Opt_block_validity, "block_validity"},
{Opt_noblock_validity, "noblock_validity"},
{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@ -1371,7 +1400,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
sbi->s_qf_names[qtype] = NULL;
return 0;
}
- set_opt(sbi->s_mount_opt, QUOTA);
+ set_opt(sb, QUOTA);
return 1;
}
@@ -1426,21 +1455,21 @@ static int parse_options(char *options, struct super_block *sb,
switch (token) {
case Opt_bsd_df:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- clear_opt(sbi->s_mount_opt, MINIX_DF);
+ clear_opt(sb, MINIX_DF);
break;
case Opt_minix_df:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- set_opt(sbi->s_mount_opt, MINIX_DF);
+ set_opt(sb, MINIX_DF);
break;
case Opt_grpid:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- set_opt(sbi->s_mount_opt, GRPID);
+ set_opt(sb, GRPID);
break;
case Opt_nogrpid:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- clear_opt(sbi->s_mount_opt, GRPID);
+ clear_opt(sb, GRPID);
break;
case Opt_resuid:
@@ -1458,38 +1487,38 @@ static int parse_options(char *options, struct super_block *sb,
/* *sb_block = match_int(&args[0]); */
break;
case Opt_err_panic:
- clear_opt(sbi->s_mount_opt, ERRORS_CONT);
- clear_opt(sbi->s_mount_opt, ERRORS_RO);
- set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ clear_opt(sb, ERRORS_CONT);
+ clear_opt(sb, ERRORS_RO);
+ set_opt(sb, ERRORS_PANIC);
break;
case Opt_err_ro:
- clear_opt(sbi->s_mount_opt, ERRORS_CONT);
- clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
- set_opt(sbi->s_mount_opt, ERRORS_RO);
+ clear_opt(sb, ERRORS_CONT);
+ clear_opt(sb, ERRORS_PANIC);
+ set_opt(sb, ERRORS_RO);
break;
case Opt_err_cont:
- clear_opt(sbi->s_mount_opt, ERRORS_RO);
- clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
- set_opt(sbi->s_mount_opt, ERRORS_CONT);
+ clear_opt(sb, ERRORS_RO);
+ clear_opt(sb, ERRORS_PANIC);
+ set_opt(sb, ERRORS_CONT);
break;
case Opt_nouid32:
- set_opt(sbi->s_mount_opt, NO_UID32);
+ set_opt(sb, NO_UID32);
break;
case Opt_debug:
- set_opt(sbi->s_mount_opt, DEBUG);
+ set_opt(sb, DEBUG);
break;
case Opt_oldalloc:
- set_opt(sbi->s_mount_opt, OLDALLOC);
+ set_opt(sb, OLDALLOC);
break;
case Opt_orlov:
- clear_opt(sbi->s_mount_opt, OLDALLOC);
+ clear_opt(sb, OLDALLOC);
break;
#ifdef CONFIG_EXT4_FS_XATTR
case Opt_user_xattr:
- set_opt(sbi->s_mount_opt, XATTR_USER);
+ set_opt(sb, XATTR_USER);
break;
case Opt_nouser_xattr:
- clear_opt(sbi->s_mount_opt, XATTR_USER);
+ clear_opt(sb, XATTR_USER);
break;
#else
case Opt_user_xattr:
@@ -1499,10 +1528,10 @@ static int parse_options(char *options, struct super_block *sb,
#endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL
case Opt_acl:
- set_opt(sbi->s_mount_opt, POSIX_ACL);
+ set_opt(sb, POSIX_ACL);
break;
case Opt_noacl:
- clear_opt(sbi->s_mount_opt, POSIX_ACL);
+ clear_opt(sb, POSIX_ACL);
break;
#else
case Opt_acl:
@@ -1521,7 +1550,7 @@ static int parse_options(char *options, struct super_block *sb,
"Cannot specify journal on remount");
return 0;
}
- set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
+ set_opt(sb, UPDATE_JOURNAL);
break;
case Opt_journal_dev:
if (is_remount) {
@@ -1534,14 +1563,14 @@ static int parse_options(char *options, struct super_block *sb,
*journal_devnum = option;
break;
case Opt_journal_checksum:
- set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
+ set_opt(sb, JOURNAL_CHECKSUM);
break;
case Opt_journal_async_commit:
- set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
- set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
+ set_opt(sb, JOURNAL_ASYNC_COMMIT);
+ set_opt(sb, JOURNAL_CHECKSUM);
break;
case Opt_noload:
- set_opt(sbi->s_mount_opt, NOLOAD);
+ set_opt(sb, NOLOAD);
break;
case Opt_commit:
if (match_int(&args[0], &option))
@@ -1584,15 +1613,15 @@ static int parse_options(char *options, struct super_block *sb,
return 0;
}
} else {
- clear_opt(sbi->s_mount_opt, DATA_FLAGS);
+ clear_opt(sb, DATA_FLAGS);
sbi->s_mount_opt |= data_opt;
}
break;
case Opt_data_err_abort:
- set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+ set_opt(sb, DATA_ERR_ABORT);
break;
case Opt_data_err_ignore:
- clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+ clear_opt(sb, DATA_ERR_ABORT);
break;
#ifdef CONFIG_QUOTA
case Opt_usrjquota:
@@ -1632,12 +1661,12 @@ set_qf_format:
break;
case Opt_quota:
case Opt_usrquota:
- set_opt(sbi->s_mount_opt, QUOTA);
- set_opt(sbi->s_mount_opt, USRQUOTA);
+ set_opt(sb, QUOTA);
+ set_opt(sb, USRQUOTA);
break;
case Opt_grpquota:
- set_opt(sbi->s_mount_opt, QUOTA);
- set_opt(sbi->s_mount_opt, GRPQUOTA);
+ set_opt(sb, QUOTA);
+ set_opt(sb, GRPQUOTA);
break;
case Opt_noquota:
if (sb_any_quota_loaded(sb)) {
@@ -1645,9 +1674,9 @@ set_qf_format:
"options when quota turned on");
return 0;
}
- clear_opt(sbi->s_mount_opt, QUOTA);
- clear_opt(sbi->s_mount_opt, USRQUOTA);
- clear_opt(sbi->s_mount_opt, GRPQUOTA);
+ clear_opt(sb, QUOTA);
+ clear_opt(sb, USRQUOTA);
+ clear_opt(sb, GRPQUOTA);
break;
#else
case Opt_quota:
@@ -1673,7 +1702,7 @@ set_qf_format:
sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
break;
case Opt_nobarrier:
- clear_opt(sbi->s_mount_opt, BARRIER);
+ clear_opt(sb, BARRIER);
break;
case Opt_barrier:
if (args[0].from) {
@@ -1682,9 +1711,9 @@ set_qf_format:
} else
option = 1; /* No argument, default to 1 */
if (option)
- set_opt(sbi->s_mount_opt, BARRIER);
+ set_opt(sb, BARRIER);
else
- clear_opt(sbi->s_mount_opt, BARRIER);
+ clear_opt(sb, BARRIER);
break;
case Opt_ignore:
break;
@@ -1708,11 +1737,17 @@ set_qf_format:
"Ignoring deprecated bh option");
break;
case Opt_i_version:
- set_opt(sbi->s_mount_opt, I_VERSION);
+ set_opt(sb, I_VERSION);
sb->s_flags |= MS_I_VERSION;
break;
case Opt_nodelalloc:
- clear_opt(sbi->s_mount_opt, DELALLOC);
+ clear_opt(sb, DELALLOC);
+ break;
+ case Opt_mblk_io_submit:
+ set_opt(sb, MBLK_IO_SUBMIT);
+ break;
+ case Opt_nomblk_io_submit:
+ clear_opt(sb, MBLK_IO_SUBMIT);
break;
case Opt_stripe:
if (match_int(&args[0], &option))
@@ -1722,13 +1757,13 @@ set_qf_format:
sbi->s_stripe = option;
break;
case Opt_delalloc:
- set_opt(sbi->s_mount_opt, DELALLOC);
+ set_opt(sb, DELALLOC);
break;
case Opt_block_validity:
- set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ set_opt(sb, BLOCK_VALIDITY);
break;
case Opt_noblock_validity:
- clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ clear_opt(sb, BLOCK_VALIDITY);
break;
case Opt_inode_readahead_blks:
if (match_int(&args[0], &option))
@@ -1752,7 +1787,7 @@ set_qf_format:
option);
break;
case Opt_noauto_da_alloc:
- set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+ set_opt(sb, NO_AUTO_DA_ALLOC);
break;
case Opt_auto_da_alloc:
if (args[0].from) {
@@ -1761,24 +1796,24 @@ set_qf_format:
} else
option = 1; /* No argument, default to 1 */
if (option)
- clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
+ clear_opt(sb, NO_AUTO_DA_ALLOC);
else
- set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+ set_opt(sb,NO_AUTO_DA_ALLOC);
break;
case Opt_discard:
- set_opt(sbi->s_mount_opt, DISCARD);
+ set_opt(sb, DISCARD);
break;
case Opt_nodiscard:
- clear_opt(sbi->s_mount_opt, DISCARD);
+ clear_opt(sb, DISCARD);
break;
case Opt_dioread_nolock:
- set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ set_opt(sb, DIOREAD_NOLOCK);
break;
case Opt_dioread_lock:
- clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ clear_opt(sb, DIOREAD_NOLOCK);
break;
case Opt_init_inode_table:
- set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ set_opt(sb, INIT_INODE_TABLE);
if (args[0].from) {
if (match_int(&args[0], &option))
return 0;
@@ -1789,7 +1824,7 @@ set_qf_format:
sbi->s_li_wait_mult = option;
break;
case Opt_noinit_inode_table:
- clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ clear_opt(sb, INIT_INODE_TABLE);
break;
default:
ext4_msg(sb, KERN_ERR,
@@ -1801,10 +1836,10 @@ set_qf_format:
#ifdef CONFIG_QUOTA
if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
- clear_opt(sbi->s_mount_opt, USRQUOTA);
+ clear_opt(sb, USRQUOTA);
if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
- clear_opt(sbi->s_mount_opt, GRPQUOTA);
+ clear_opt(sb, GRPQUOTA);
if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
ext4_msg(sb, KERN_ERR, "old and new quota "
@@ -1874,12 +1909,12 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
ext4_commit_super(sb, 1);
if (test_opt(sb, DEBUG))
printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
- "bpg=%lu, ipg=%lu, mo=%04x]\n",
+ "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
sb->s_blocksize,
sbi->s_groups_count,
EXT4_BLOCKS_PER_GROUP(sb),
EXT4_INODES_PER_GROUP(sb),
- sbi->s_mount_opt);
+ sbi->s_mount_opt, sbi->s_mount_opt2);
return res;
}
@@ -1909,14 +1944,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
size = flex_group_count * sizeof(struct flex_groups);
sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
if (sbi->s_flex_groups == NULL) {
- sbi->s_flex_groups = vmalloc(size);
- if (sbi->s_flex_groups)
- memset(sbi->s_flex_groups, 0, size);
- }
- if (sbi->s_flex_groups == NULL) {
- ext4_msg(sb, KERN_ERR, "not enough memory for "
- "%u flex groups", flex_group_count);
- goto failed;
+ sbi->s_flex_groups = vzalloc(size);
+ if (sbi->s_flex_groups == NULL) {
+ ext4_msg(sb, KERN_ERR,
+ "not enough memory for %u flex groups",
+ flex_group_count);
+ goto failed;
+ }
}
for (i = 0; i < sbi->s_groups_count; i++) {
@@ -2699,7 +2733,6 @@ static int ext4_lazyinit_thread(void *arg)
struct ext4_li_request *elr;
unsigned long next_wakeup;
DEFINE_WAIT(wait);
- int ret;
BUG_ON(NULL == eli);
@@ -2723,13 +2756,12 @@ cont_thread:
elr = list_entry(pos, struct ext4_li_request,
lr_request);
- if (time_after_eq(jiffies, elr->lr_next_sched))
- ret = ext4_run_li_request(elr);
-
- if (ret) {
- ret = 0;
- ext4_remove_li_request(elr);
- continue;
+ if (time_after_eq(jiffies, elr->lr_next_sched)) {
+ if (ext4_run_li_request(elr) != 0) {
+ /* error, remove the lazy_init job */
+ ext4_remove_li_request(elr);
+ continue;
+ }
}
if (time_before(elr->lr_next_sched, next_wakeup))
@@ -2740,7 +2772,8 @@ cont_thread:
if (freezing(current))
refrigerator();
- if (time_after_eq(jiffies, next_wakeup)) {
+ if ((time_after_eq(jiffies, next_wakeup)) ||
+ (MAX_JIFFY_OFFSET == next_wakeup)) {
cond_resched();
continue;
}
@@ -2788,9 +2821,6 @@ static void ext4_clear_request_list(void)
struct ext4_li_request *elr;
mutex_lock(&ext4_li_info->li_list_mtx);
- if (list_empty(&ext4_li_info->li_request_list))
- return;
-
list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
elr = list_entry(pos, struct ext4_li_request,
lr_request);
@@ -2899,7 +2929,7 @@ static int ext4_register_li_request(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
- int ret;
+ int ret = 0;
if (sbi->s_li_request != NULL)
return 0;
@@ -3054,41 +3084,41 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
- set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ set_opt(sb, INIT_INODE_TABLE);
if (def_mount_opts & EXT4_DEFM_DEBUG)
- set_opt(sbi->s_mount_opt, DEBUG);
+ set_opt(sb, DEBUG);
if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
"2.6.38");
- set_opt(sbi->s_mount_opt, GRPID);
+ set_opt(sb, GRPID);
}
if (def_mount_opts & EXT4_DEFM_UID16)
- set_opt(sbi->s_mount_opt, NO_UID32);
+ set_opt(sb, NO_UID32);
#ifdef CONFIG_EXT4_FS_XATTR
if (def_mount_opts & EXT4_DEFM_XATTR_USER)
- set_opt(sbi->s_mount_opt, XATTR_USER);
+ set_opt(sb, XATTR_USER);
#endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL
if (def_mount_opts & EXT4_DEFM_ACL)
- set_opt(sbi->s_mount_opt, POSIX_ACL);
+ set_opt(sb, POSIX_ACL);
#endif
if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
- set_opt(sbi->s_mount_opt, JOURNAL_DATA);
+ set_opt(sb, JOURNAL_DATA);
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
- set_opt(sbi->s_mount_opt, ORDERED_DATA);
+ set_opt(sb, ORDERED_DATA);
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
- set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+ set_opt(sb, WRITEBACK_DATA);
if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
- set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ set_opt(sb, ERRORS_PANIC);
else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
- set_opt(sbi->s_mount_opt, ERRORS_CONT);
+ set_opt(sb, ERRORS_CONT);
else
- set_opt(sbi->s_mount_opt, ERRORS_RO);
+ set_opt(sb, ERRORS_RO);
if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
- set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ set_opt(sb, BLOCK_VALIDITY);
if (def_mount_opts & EXT4_DEFM_DISCARD)
- set_opt(sbi->s_mount_opt, DISCARD);
+ set_opt(sb, DISCARD);
sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -3097,7 +3127,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
- set_opt(sbi->s_mount_opt, BARRIER);
+ set_opt(sb, BARRIER);
/*
* enable delayed allocation by default
@@ -3105,7 +3135,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
*/
if (!IS_EXT3_SB(sb) &&
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
- set_opt(sbi->s_mount_opt, DELALLOC);
+ set_opt(sb, DELALLOC);
if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
&journal_devnum, &journal_ioprio, NULL, 0)) {
@@ -3257,13 +3287,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
*/
- ret = generic_check_addressable(sb->s_blocksize_bits,
+ err = generic_check_addressable(sb->s_blocksize_bits,
ext4_blocks_count(es));
- if (ret) {
+ if (err) {
ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely on this system");
if (sizeof(sector_t) < 8)
ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
+ ret = err;
goto failed_mount;
}
@@ -3348,6 +3379,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
spin_lock_init(&sbi->s_next_gen_lock);
+ err = percpu_counter_init(&sbi->s_freeblocks_counter,
+ ext4_count_free_blocks(sb));
+ if (!err) {
+ err = percpu_counter_init(&sbi->s_freeinodes_counter,
+ ext4_count_free_inodes(sb));
+ }
+ if (!err) {
+ err = percpu_counter_init(&sbi->s_dirs_counter,
+ ext4_count_dirs(sb));
+ }
+ if (!err) {
+ err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+ }
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "insufficient memory");
+ goto failed_mount3;
+ }
+
sbi->s_stripe = ext4_get_stripe_size(sbi);
sbi->s_max_writeback_mb_bump = 128;
@@ -3389,8 +3438,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"suppressed and not mounted read-only");
goto failed_mount_wq;
} else {
- clear_opt(sbi->s_mount_opt, DATA_FLAGS);
- set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+ clear_opt(sb, DATA_FLAGS);
+ set_opt(sb, WRITEBACK_DATA);
sbi->s_journal = NULL;
needs_recovery = 0;
goto no_journal;
@@ -3428,9 +3477,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
*/
if (jbd2_journal_check_available_features
(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
- set_opt(sbi->s_mount_opt, ORDERED_DATA);
+ set_opt(sb, ORDERED_DATA);
else
- set_opt(sbi->s_mount_opt, JOURNAL_DATA);
+ set_opt(sb, JOURNAL_DATA);
break;
case EXT4_MOUNT_ORDERED_DATA:
@@ -3446,22 +3495,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
-no_journal:
- err = percpu_counter_init(&sbi->s_freeblocks_counter,
- ext4_count_free_blocks(sb));
- if (!err)
- err = percpu_counter_init(&sbi->s_freeinodes_counter,
- ext4_count_free_inodes(sb));
- if (!err)
- err = percpu_counter_init(&sbi->s_dirs_counter,
- ext4_count_dirs(sb));
- if (!err)
- err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
- if (err) {
- ext4_msg(sb, KERN_ERR, "insufficient memory");
- goto failed_mount_wq;
- }
+ /*
+ * The journal may have updated the bg summary counts, so we
+ * need to update the global counters.
+ */
+ percpu_counter_set(&sbi->s_freeblocks_counter,
+ ext4_count_free_blocks(sb));
+ percpu_counter_set(&sbi->s_freeinodes_counter,
+ ext4_count_free_inodes(sb));
+ percpu_counter_set(&sbi->s_dirs_counter,
+ ext4_count_dirs(sb));
+ percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
+no_journal:
EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
if (!EXT4_SB(sb)->dio_unwritten_wq) {
printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3523,18 +3569,18 @@ no_journal:
(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
"requested data journaling mode");
- clear_opt(sbi->s_mount_opt, DELALLOC);
+ clear_opt(sb, DELALLOC);
}
if (test_opt(sb, DIOREAD_NOLOCK)) {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
"option - requested data journaling mode");
- clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ clear_opt(sb, DIOREAD_NOLOCK);
}
if (sb->s_blocksize < PAGE_SIZE) {
ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
"option - block size is too small");
- clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ clear_opt(sb, DIOREAD_NOLOCK);
}
}
@@ -3611,10 +3657,6 @@ failed_mount_wq:
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
}
- percpu_counter_destroy(&sbi->s_freeblocks_counter);
- percpu_counter_destroy(&sbi->s_freeinodes_counter);
- percpu_counter_destroy(&sbi->s_dirs_counter);
- percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
failed_mount3:
if (sbi->s_flex_groups) {
if (is_vmalloc_addr(sbi->s_flex_groups))
@@ -3622,6 +3664,10 @@ failed_mount3:
else
kfree(sbi->s_flex_groups);
}
+ percpu_counter_destroy(&sbi->s_freeblocks_counter);
+ percpu_counter_destroy(&sbi->s_freeinodes_counter);
+ percpu_counter_destroy(&sbi->s_dirs_counter);
+ percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
failed_mount2:
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
@@ -3949,13 +3995,11 @@ static int ext4_commit_super(struct super_block *sb, int sync)
else
es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
- if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter))
- ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeblocks_counter));
- if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
- es->s_free_inodes_count =
- cpu_to_le32(percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeinodes_counter));
+ ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
+ &EXT4_SB(sb)->s_freeblocks_counter));
+ es->s_free_inodes_count =
+ cpu_to_le32(percpu_counter_sum_positive(
+ &EXT4_SB(sb)->s_freeinodes_counter));
sb->s_dirt = 0;
BUFFER_TRACE(sbh, "marking dirty");
mark_buffer_dirty(sbh);
@@ -4135,6 +4179,22 @@ static int ext4_unfreeze(struct super_block *sb)
return 0;
}
+/*
+ * Structure to save mount options for ext4_remount's benefit
+ */
+struct ext4_mount_options {
+ unsigned long s_mount_opt;
+ unsigned long s_mount_opt2;
+ uid_t s_resuid;
+ gid_t s_resgid;
+ unsigned long s_commit_interval;
+ u32 s_min_batch_time, s_max_batch_time;
+#ifdef CONFIG_QUOTA
+ int s_jquota_fmt;
+ char *s_qf_names[MAXQUOTAS];
+#endif
+};
+
static int ext4_remount(struct super_block *sb, int *flags, char *data)
{
struct ext4_super_block *es;
@@ -4155,6 +4215,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
lock_super(sb);
old_sb_flags = sb->s_flags;
old_opts.s_mount_opt = sbi->s_mount_opt;
+ old_opts.s_mount_opt2 = sbi->s_mount_opt2;
old_opts.s_resuid = sbi->s_resuid;
old_opts.s_resgid = sbi->s_resgid;
old_opts.s_commit_interval = sbi->s_commit_interval;
@@ -4308,6 +4369,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
restore_opts:
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
+ sbi->s_mount_opt2 = old_opts.s_mount_opt2;
sbi->s_resuid = old_opts.s_resuid;
sbi->s_resgid = old_opts.s_resgid;
sbi->s_commit_interval = old_opts.s_commit_interval;
@@ -4556,12 +4618,10 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
static int ext4_quota_off(struct super_block *sb, int type)
{
- /* Force all delayed allocation blocks to be allocated */
- if (test_opt(sb, DELALLOC)) {
- down_read(&sb->s_umount);
+ /* Force all delayed allocation blocks to be allocated.
+ * Caller already holds s_umount sem */
+ if (test_opt(sb, DELALLOC))
sync_filesystem(sb);
- up_read(&sb->s_umount);
- }
return dquot_quota_off(sb, type);
}
@@ -4667,17 +4727,17 @@ out:
#endif
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
}
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
.name = "ext2",
- .get_sb = ext4_get_sb,
+ .mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -4722,7 +4782,7 @@ static inline void unregister_as_ext3(void) { }
static struct file_system_type ext4_fs_type = {
.owner = THIS_MODULE,
.name = "ext4",
- .get_sb = ext4_get_sb,
+ .mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fa4b899..fc32176 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -427,23 +427,23 @@ cleanup:
static int
ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
- int i_error, b_error;
+ int ret, ret2;
down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
- i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
- if (i_error < 0) {
- b_error = 0;
- } else {
- if (buffer) {
- buffer += i_error;
- buffer_size -= i_error;
- }
- b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
- if (b_error < 0)
- i_error = 0;
+ ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
+ if (ret < 0)
+ goto errout;
+ if (buffer) {
+ buffer += ret;
+ buffer_size -= ret;
}
+ ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
+ if (ret < 0)
+ goto errout;
+ ret += ret2;
+errout:
up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
- return i_error + b_error;
+ return ret;
}
/*
@@ -947,7 +947,7 @@ ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
/*
* ext4_xattr_set_handle()
*
- * Create, replace or remove an extended attribute for this inode. Buffer
+ * Create, replace or remove an extended attribute for this inode. Value
* is NULL to remove an existing extended attribute, and non-NULL to
* either replace an existing extended attribute, or create a new extended
* attribute. The flags XATTR_REPLACE and XATTR_CREATE
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 281dd83..1ef1652 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -122,7 +122,7 @@ ext4_xattr_put_super(struct super_block *sb)
}
static __init inline int
-init_ext4_xattr(void)
+ext4_init_xattr(void)
{
return 0;
}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ad6998a..206351a 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -514,11 +514,18 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void fat_destroy_inode(struct inode *inode)
+static void fat_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
}
+static void fat_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, fat_i_callback);
+}
+
static void init_once(void *foo)
{
struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
@@ -743,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb,
*/
result = d_obtain_alias(inode);
if (!IS_ERR(result))
- result->d_op = sb->s_root->d_op;
+ d_set_d_op(result, sb->s_root->d_op);
return result;
}
@@ -793,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child)
parent = d_obtain_alias(inode);
if (!IS_ERR(parent))
- parent->d_op = sb->s_root->d_op;
+ d_set_d_op(parent, sb->s_root->d_op);
out:
unlock_super(sb);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index bbca5c1..35ffe43 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -148,7 +148,8 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len,
* that the existing dentry can be used. The msdos fs routines will
* return ENOENT or EINVAL as appropriate.
*/
-static int msdos_hash(struct dentry *dentry, struct qstr *qstr)
+static int msdos_hash(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options;
unsigned char msdos_name[MSDOS_NAME];
@@ -164,16 +165,18 @@ static int msdos_hash(struct dentry *dentry, struct qstr *qstr)
* Compare two msdos names. If either of the names are invalid,
* we fall back to doing the standard name comparison.
*/
-static int msdos_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b)
+static int msdos_cmp(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options;
+ struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options;
unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME];
int error;
- error = msdos_format_name(a->name, a->len, a_msdos_name, options);
+ error = msdos_format_name(name->name, name->len, a_msdos_name, options);
if (error)
goto old_compare;
- error = msdos_format_name(b->name, b->len, b_msdos_name, options);
+ error = msdos_format_name(str, len, b_msdos_name, options);
if (error)
goto old_compare;
error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME);
@@ -182,8 +185,8 @@ out:
old_compare:
error = 1;
- if (a->len == b->len)
- error = memcmp(a->name, b->name, a->len);
+ if (name->len == len)
+ error = memcmp(name->name, str, len);
goto out;
}
@@ -224,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
}
out:
unlock_super(sb);
- dentry->d_op = &msdos_dentry_operations;
+ d_set_d_op(dentry, &msdos_dentry_operations);
dentry = d_splice_alias(inode, dentry);
if (dentry)
- dentry->d_op = &msdos_dentry_operations;
+ d_set_d_op(dentry, &msdos_dentry_operations);
return dentry;
error:
@@ -670,23 +673,22 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
}
sb->s_flags |= MS_NOATIME;
- sb->s_root->d_op = &msdos_dentry_operations;
+ d_set_d_op(sb->s_root, &msdos_dentry_operations);
unlock_super(sb);
return 0;
}
-static int msdos_get_sb(struct file_system_type *fs_type,
+static struct dentry *msdos_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super);
}
static struct file_system_type msdos_fs_type = {
.owner = THIS_MODULE,
.name = "msdos",
- .get_sb = msdos_get_sb,
+ .mount = msdos_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6f0f6c9..e3ffc5e 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,6 +43,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
{
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
/* This is not negative dentry. Always valid. */
if (dentry->d_inode)
return 1;
@@ -51,6 +54,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
{
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
/*
* This is not negative dentry. Always valid.
*
@@ -85,22 +91,26 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
}
/* returns the length of a struct qstr, ignoring trailing dots */
-static unsigned int vfat_striptail_len(struct qstr *qstr)
+static unsigned int __vfat_striptail_len(unsigned int len, const char *name)
{
- unsigned int len = qstr->len;
-
- while (len && qstr->name[len - 1] == '.')
+ while (len && name[len - 1] == '.')
len--;
return len;
}
+static unsigned int vfat_striptail_len(const struct qstr *qstr)
+{
+ return __vfat_striptail_len(qstr->len, qstr->name);
+}
+
/*
* Compute the hash for the vfat name corresponding to the dentry.
* Note: if the name is invalid, we leave the hash code unchanged so
* that the existing dentry can be used. The vfat fs routines will
* return ENOENT or EINVAL as appropriate.
*/
-static int vfat_hash(struct dentry *dentry, struct qstr *qstr)
+static int vfat_hash(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr));
return 0;
@@ -112,9 +122,10 @@ static int vfat_hash(struct dentry *dentry, struct qstr *qstr)
* that the existing dentry can be used. The vfat fs routines will
* return ENOENT or EINVAL as appropriate.
*/
-static int vfat_hashi(struct dentry *dentry, struct qstr *qstr)
+static int vfat_hashi(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
- struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io;
+ struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
const unsigned char *name;
unsigned int len;
unsigned long hash;
@@ -133,16 +144,18 @@ static int vfat_hashi(struct dentry *dentry, struct qstr *qstr)
/*
* Case insensitive compare of two vfat names.
*/
-static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b)
+static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io;
+ struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io;
unsigned int alen, blen;
/* A filename cannot end in '.' or we treat it like it has none */
- alen = vfat_striptail_len(a);
- blen = vfat_striptail_len(b);
+ alen = vfat_striptail_len(name);
+ blen = __vfat_striptail_len(len, str);
if (alen == blen) {
- if (nls_strnicmp(t, a->name, b->name, alen) == 0)
+ if (nls_strnicmp(t, name->name, str, alen) == 0)
return 0;
}
return 1;
@@ -151,15 +164,17 @@ static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b)
/*
* Case sensitive compare of two vfat names.
*/
-static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b)
+static int vfat_cmp(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
unsigned int alen, blen;
/* A filename cannot end in '.' or we treat it like it has none */
- alen = vfat_striptail_len(a);
- blen = vfat_striptail_len(b);
+ alen = vfat_striptail_len(name);
+ blen = __vfat_striptail_len(len, str);
if (alen == blen) {
- if (strncmp(a->name, b->name, alen) == 0)
+ if (strncmp(name->name, str, alen) == 0)
return 0;
}
return 1;
@@ -757,11 +772,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
out:
unlock_super(sb);
- dentry->d_op = sb->s_root->d_op;
+ d_set_d_op(dentry, sb->s_root->d_op);
dentry->d_time = dentry->d_parent->d_inode->i_version;
dentry = d_splice_alias(inode, dentry);
if (dentry) {
- dentry->d_op = sb->s_root->d_op;
+ d_set_d_op(dentry, sb->s_root->d_op);
dentry->d_time = dentry->d_parent->d_inode->i_version;
}
return dentry;
@@ -1063,26 +1078,25 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
}
if (MSDOS_SB(sb)->options.name_check != 's')
- sb->s_root->d_op = &vfat_ci_dentry_ops;
+ d_set_d_op(sb->s_root, &vfat_ci_dentry_ops);
else
- sb->s_root->d_op = &vfat_dentry_ops;
+ d_set_d_op(sb->s_root, &vfat_dentry_ops);
unlock_super(sb);
return 0;
}
-static int vfat_get_sb(struct file_system_type *fs_type,
+static struct dentry *vfat_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super);
}
static struct file_system_type vfat_fs_type = {
.owner = THIS_MODULE,
.name = "vfat",
- .get_sb = vfat_get_sb,
+ .mount = vfat_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 68ba492..751d6b2 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -115,6 +115,9 @@ int unregister_filesystem(struct file_system_type * fs)
tmp = &(*tmp)->next;
}
write_unlock(&file_systems_lock);
+
+ synchronize_rcu();
+
return -EINVAL;
}
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 8c04eac..2ba6719 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -337,6 +337,13 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
return ip;
}
+static void vxfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(vxfs_inode_cachep, inode->i_private);
+}
+
/**
* vxfs_evict_inode - remove inode from main memory
* @ip: inode to discard.
@@ -350,5 +357,5 @@ vxfs_evict_inode(struct inode *ip)
{
truncate_inode_pages(&ip->i_data, 0);
end_writeback(ip);
- kmem_cache_free(vxfs_inode_cachep, ip->i_private);
+ call_rcu(&ip->i_rcu, vxfs_i_callback);
}
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 71b0148..9d1c995 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -246,17 +246,16 @@ out:
/*
* The usual module blurb.
*/
-static int vxfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *vxfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, vxfs_fill_super);
}
static struct file_system_type vxfs_fs_type = {
.owner = THIS_MODULE,
.name = "vxfs",
- .get_sb = vxfs_get_sb,
+ .mount = vxfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index aed881a..3d06ccc 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -707,6 +707,17 @@ get_next_work_item(struct backing_dev_info *bdi)
return work;
}
+/*
+ * Add in the number of potentially dirty inodes, because each inode
+ * write can dirty pagecache in the underlying blockdev.
+ */
+static unsigned long get_nr_dirty_pages(void)
+{
+ return global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS) +
+ get_nr_dirty_inodes();
+}
+
static long wb_check_old_data_flush(struct bdi_writeback *wb)
{
unsigned long expired;
@@ -724,13 +735,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
return 0;
wb->last_old_flush = jiffies;
- /*
- * Add in the number of potentially dirty inodes, because each inode
- * write can dirty pagecache in the underlying blockdev.
- */
- nr_pages = global_page_state(NR_FILE_DIRTY) +
- global_page_state(NR_UNSTABLE_NFS) +
- get_nr_dirty_inodes();
+ nr_pages = get_nr_dirty_pages();
if (nr_pages) {
struct wb_writeback_work work = {
@@ -1076,32 +1081,42 @@ static void wait_sb_inodes(struct super_block *sb)
}
/**
- * writeback_inodes_sb - writeback dirty inodes from given super_block
+ * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
* @sb: the superblock
+ * @nr: the number of pages to write
*
* Start writeback on some inodes on this super_block. No guarantees are made
* on how many (if any) will be written, and this function does not wait
- * for IO completion of submitted IO. The number of pages submitted is
- * returned.
+ * for IO completion of submitted IO.
*/
-void writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
{
- unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
- unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
DECLARE_COMPLETION_ONSTACK(done);
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_NONE,
.done = &done,
+ .nr_pages = nr,
};
WARN_ON(!rwsem_is_locked(&sb->s_umount));
-
- work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
-
bdi_queue_work(sb->s_bdi, &work);
wait_for_completion(&done);
}
+EXPORT_SYMBOL(writeback_inodes_sb_nr);
+
+/**
+ * writeback_inodes_sb - writeback dirty inodes from given super_block
+ * @sb: the superblock
+ *
+ * Start writeback on some inodes on this super_block. No guarantees are made
+ * on how many (if any) will be written, and this function does not wait
+ * for IO completion of submitted IO.
+ */
+void writeback_inodes_sb(struct super_block *sb)
+{
+ return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
+}
EXPORT_SYMBOL(writeback_inodes_sb);
/**
@@ -1124,6 +1139,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb)
EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
/**
+ * writeback_inodes_sb_if_idle - start writeback if none underway
+ * @sb: the superblock
+ * @nr: the number of pages to write
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
+ unsigned long nr)
+{
+ if (!writeback_in_progress(sb->s_bdi)) {
+ down_read(&sb->s_umount);
+ writeback_inodes_sb_nr(sb, nr);
+ up_read(&sb->s_umount);
+ return 1;
+ } else
+ return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
+
+/**
* sync_inodes_sb - sync sb inode pages
* @sb: the superblock
*
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index ed45a9c..68ca487 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -14,12 +14,14 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
struct path old_root;
spin_lock(&fs->lock);
+ write_seqcount_begin(&fs->seq);
old_root = fs->root;
fs->root = *path;
- path_get(path);
+ path_get_long(path);
+ write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
if (old_root.dentry)
- path_put(&old_root);
+ path_put_long(&old_root);
}
/*
@@ -31,13 +33,15 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
struct path old_pwd;
spin_lock(&fs->lock);
+ write_seqcount_begin(&fs->seq);
old_pwd = fs->pwd;
fs->pwd = *path;
- path_get(path);
+ path_get_long(path);
+ write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
if (old_pwd.dentry)
- path_put(&old_pwd);
+ path_put_long(&old_pwd);
}
void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -52,31 +56,33 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
fs = p->fs;
if (fs) {
spin_lock(&fs->lock);
+ write_seqcount_begin(&fs->seq);
if (fs->root.dentry == old_root->dentry
&& fs->root.mnt == old_root->mnt) {
- path_get(new_root);
+ path_get_long(new_root);
fs->root = *new_root;
count++;
}
if (fs->pwd.dentry == old_root->dentry
&& fs->pwd.mnt == old_root->mnt) {
- path_get(new_root);
+ path_get_long(new_root);
fs->pwd = *new_root;
count++;
}
+ write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
}
task_unlock(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
while (count--)
- path_put(old_root);
+ path_put_long(old_root);
}
void free_fs_struct(struct fs_struct *fs)
{
- path_put(&fs->root);
- path_put(&fs->pwd);
+ path_put_long(&fs->root);
+ path_put_long(&fs->pwd);
kmem_cache_free(fs_cachep, fs);
}
@@ -88,8 +94,10 @@ void exit_fs(struct task_struct *tsk)
int kill;
task_lock(tsk);
spin_lock(&fs->lock);
+ write_seqcount_begin(&fs->seq);
tsk->fs = NULL;
kill = !--fs->users;
+ write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
task_unlock(tsk);
if (kill)
@@ -105,8 +113,15 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
fs->users = 1;
fs->in_exec = 0;
spin_lock_init(&fs->lock);
+ seqcount_init(&fs->seq);
fs->umask = old->umask;
- get_fs_root_and_pwd(old, &fs->root, &fs->pwd);
+
+ spin_lock(&old->lock);
+ fs->root = old->root;
+ path_get_long(&fs->root);
+ fs->pwd = old->pwd;
+ path_get_long(&fs->pwd);
+ spin_unlock(&old->lock);
}
return fs;
}
@@ -144,6 +159,7 @@ EXPORT_SYMBOL(current_umask);
struct fs_struct init_fs = {
.users = 1,
.lock = __SPIN_LOCK_UNLOCKED(init_fs.lock),
+ .seq = SEQCNT_ZERO,
.umask = 0022,
};
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 4eba076..85542a7 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -322,12 +322,10 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
return 0;
}
-static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data,
- struct vfsmount *mnt)
+static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data)
{
- return get_sb_single(fs_type, flags, raw_data,
- fuse_ctl_fill_super, mnt);
+ return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super);
}
static void fuse_ctl_kill_sb(struct super_block *sb)
@@ -346,7 +344,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
static struct file_system_type fuse_ctl_fs_type = {
.owner = THIS_MODULE,
.name = "fusectl",
- .get_sb = fuse_ctl_get_sb,
+ .mount = fuse_ctl_mount,
.kill_sb = fuse_ctl_kill_sb,
};
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6e07696..cf8d28d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -251,6 +251,20 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
}
+void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
+ u64 nodeid, u64 nlookup)
+{
+ forget->forget_one.nodeid = nodeid;
+ forget->forget_one.nlookup = nlookup;
+
+ spin_lock(&fc->lock);
+ fc->forget_list_tail->next = forget;
+ fc->forget_list_tail = forget;
+ wake_up(&fc->waitq);
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+ spin_unlock(&fc->lock);
+}
+
static void flush_bg_queue(struct fuse_conn *fc)
{
while (fc->active_background < fc->max_background &&
@@ -438,12 +452,6 @@ static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
}
}
-void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
-{
- req->isreply = 0;
- fuse_request_send_nowait(fc, req);
-}
-
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
{
req->isreply = 1;
@@ -896,9 +904,15 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
return err;
}
+static int forget_pending(struct fuse_conn *fc)
+{
+ return fc->forget_list_head.next != NULL;
+}
+
static int request_pending(struct fuse_conn *fc)
{
- return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);
+ return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
+ forget_pending(fc);
}
/* Wait until a request is available on the pending list */
@@ -960,6 +974,120 @@ __releases(fc->lock)
return err ? err : reqsize;
}
+static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
+ unsigned max,
+ unsigned *countp)
+{
+ struct fuse_forget_link *head = fc->forget_list_head.next;
+ struct fuse_forget_link **newhead = &head;
+ unsigned count;
+
+ for (count = 0; *newhead != NULL && count < max; count++)
+ newhead = &(*newhead)->next;
+
+ fc->forget_list_head.next = *newhead;
+ *newhead = NULL;
+ if (fc->forget_list_head.next == NULL)
+ fc->forget_list_tail = &fc->forget_list_head;
+
+ if (countp != NULL)
+ *countp = count;
+
+ return head;
+}
+
+static int fuse_read_single_forget(struct fuse_conn *fc,
+ struct fuse_copy_state *cs,
+ size_t nbytes)
+__releases(fc->lock)
+{
+ int err;
+ struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
+ struct fuse_forget_in arg = {
+ .nlookup = forget->forget_one.nlookup,
+ };
+ struct fuse_in_header ih = {
+ .opcode = FUSE_FORGET,
+ .nodeid = forget->forget_one.nodeid,
+ .unique = fuse_get_unique(fc),
+ .len = sizeof(ih) + sizeof(arg),
+ };
+
+ spin_unlock(&fc->lock);
+ kfree(forget);
+ if (nbytes < ih.len)
+ return -EINVAL;
+
+ err = fuse_copy_one(cs, &ih, sizeof(ih));
+ if (!err)
+ err = fuse_copy_one(cs, &arg, sizeof(arg));
+ fuse_copy_finish(cs);
+
+ if (err)
+ return err;
+
+ return ih.len;
+}
+
+static int fuse_read_batch_forget(struct fuse_conn *fc,
+ struct fuse_copy_state *cs, size_t nbytes)
+__releases(fc->lock)
+{
+ int err;
+ unsigned max_forgets;
+ unsigned count;
+ struct fuse_forget_link *head;
+ struct fuse_batch_forget_in arg = { .count = 0 };
+ struct fuse_in_header ih = {
+ .opcode = FUSE_BATCH_FORGET,
+ .unique = fuse_get_unique(fc),
+ .len = sizeof(ih) + sizeof(arg),
+ };
+
+ if (nbytes < ih.len) {
+ spin_unlock(&fc->lock);
+ return -EINVAL;
+ }
+
+ max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
+ head = dequeue_forget(fc, max_forgets, &count);
+ spin_unlock(&fc->lock);
+
+ arg.count = count;
+ ih.len += count * sizeof(struct fuse_forget_one);
+ err = fuse_copy_one(cs, &ih, sizeof(ih));
+ if (!err)
+ err = fuse_copy_one(cs, &arg, sizeof(arg));
+
+ while (head) {
+ struct fuse_forget_link *forget = head;
+
+ if (!err) {
+ err = fuse_copy_one(cs, &forget->forget_one,
+ sizeof(forget->forget_one));
+ }
+ head = forget->next;
+ kfree(forget);
+ }
+
+ fuse_copy_finish(cs);
+
+ if (err)
+ return err;
+
+ return ih.len;
+}
+
+static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
+ size_t nbytes)
+__releases(fc->lock)
+{
+ if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
+ return fuse_read_single_forget(fc, cs, nbytes);
+ else
+ return fuse_read_batch_forget(fc, cs, nbytes);
+}
+
/*
* Read a single request into the userspace filesystem's buffer. This
* function waits until a request is available, then removes it from
@@ -998,6 +1126,14 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
return fuse_read_interrupt(fc, cs, nbytes, req);
}
+ if (forget_pending(fc)) {
+ if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
+ return fuse_read_forget(fc, cs, nbytes);
+
+ if (fc->forget_batch <= -8)
+ fc->forget_batch = 16;
+ }
+
req = list_entry(fc->pending.next, struct fuse_req, list);
req->state = FUSE_REQ_READING;
list_move(&req->list, &fc->io);
@@ -1090,7 +1226,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (!fc)
return -EPERM;
- bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
+ bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
if (!bufs)
return -ENOMEM;
@@ -1626,7 +1762,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
if (!fc)
return -EPERM;
- bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
+ bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
if (!bufs)
return -ENOMEM;
@@ -1770,6 +1906,8 @@ __acquires(fc->lock)
flush_bg_queue(fc);
end_requests(fc, &fc->pending);
end_requests(fc, &fc->processing);
+ while (forget_pending(fc))
+ kfree(dequeue_forget(fc, 1, NULL));
}
/*
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c9627c9..042af73 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -10,9 +10,9 @@
#include <linux/pagemap.h>
#include <linux/file.h>
-#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/namei.h>
+#include <linux/slab.h>
#if BITS_PER_LONG >= 64
static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
@@ -156,8 +156,12 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
*/
static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
{
- struct inode *inode = entry->d_inode;
+ struct inode *inode;
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = entry->d_inode;
if (inode && is_bad_inode(inode))
return 0;
else if (fuse_dentry_time(entry) < get_jiffies_64()) {
@@ -165,7 +169,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
struct fuse_entry_out outarg;
struct fuse_conn *fc;
struct fuse_req *req;
- struct fuse_req *forget_req;
+ struct fuse_forget_link *forget;
struct dentry *parent;
u64 attr_version;
@@ -178,8 +182,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
if (IS_ERR(req))
return 0;
- forget_req = fuse_get_req(fc);
- if (IS_ERR(forget_req)) {
+ forget = fuse_alloc_forget();
+ if (!forget) {
fuse_put_request(fc, req);
return 0;
}
@@ -199,15 +203,14 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
if (!err) {
struct fuse_inode *fi = get_fuse_inode(inode);
if (outarg.nodeid != get_node_id(inode)) {
- fuse_send_forget(fc, forget_req,
- outarg.nodeid, 1);
+ fuse_queue_forget(fc, forget, outarg.nodeid, 1);
return 0;
}
spin_lock(&fc->lock);
fi->nlookup++;
spin_unlock(&fc->lock);
}
- fuse_put_request(fc, forget_req);
+ kfree(forget);
if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
return 0;
@@ -259,7 +262,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
struct fuse_req *req;
- struct fuse_req *forget_req;
+ struct fuse_forget_link *forget;
u64 attr_version;
int err;
@@ -273,9 +276,9 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
if (IS_ERR(req))
goto out;
- forget_req = fuse_get_req(fc);
- err = PTR_ERR(forget_req);
- if (IS_ERR(forget_req)) {
+ forget = fuse_alloc_forget();
+ err = -ENOMEM;
+ if (!forget) {
fuse_put_request(fc, req);
goto out;
}
@@ -301,13 +304,13 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
attr_version);
err = -ENOMEM;
if (!*inode) {
- fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
+ fuse_queue_forget(fc, forget, outarg->nodeid, 1);
goto out;
}
err = 0;
out_put_forget:
- fuse_put_request(fc, forget_req);
+ kfree(forget);
out:
return err;
}
@@ -347,7 +350,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
}
entry = newent ? newent : entry;
- entry->d_op = &fuse_dentry_operations;
+ d_set_d_op(entry, &fuse_dentry_operations);
if (outarg_valid)
fuse_change_entry_timeout(entry, &outarg);
else
@@ -374,7 +377,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
struct inode *inode;
struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_req *req;
- struct fuse_req *forget_req;
+ struct fuse_forget_link *forget;
struct fuse_create_in inarg;
struct fuse_open_out outopen;
struct fuse_entry_out outentry;
@@ -388,9 +391,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
if (flags & O_DIRECT)
return -EINVAL;
- forget_req = fuse_get_req(fc);
- if (IS_ERR(forget_req))
- return PTR_ERR(forget_req);
+ forget = fuse_alloc_forget();
+ if (!forget)
+ return -ENOMEM;
req = fuse_get_req(fc);
err = PTR_ERR(req);
@@ -448,10 +451,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
fuse_sync_release(ff, flags);
- fuse_send_forget(fc, forget_req, outentry.nodeid, 1);
+ fuse_queue_forget(fc, forget, outentry.nodeid, 1);
return -ENOMEM;
}
- fuse_put_request(fc, forget_req);
+ kfree(forget);
d_instantiate(entry, inode);
fuse_change_entry_timeout(entry, &outentry);
fuse_invalidate_attr(dir);
@@ -469,7 +472,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
out_put_request:
fuse_put_request(fc, req);
out_put_forget_req:
- fuse_put_request(fc, forget_req);
+ kfree(forget);
return err;
}
@@ -483,12 +486,12 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
struct fuse_entry_out outarg;
struct inode *inode;
int err;
- struct fuse_req *forget_req;
+ struct fuse_forget_link *forget;
- forget_req = fuse_get_req(fc);
- if (IS_ERR(forget_req)) {
+ forget = fuse_alloc_forget();
+ if (!forget) {
fuse_put_request(fc, req);
- return PTR_ERR(forget_req);
+ return -ENOMEM;
}
memset(&outarg, 0, sizeof(outarg));
@@ -515,10 +518,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
&outarg.attr, entry_attr_timeout(&outarg), 0);
if (!inode) {
- fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
+ fuse_queue_forget(fc, forget, outarg.nodeid, 1);
return -ENOMEM;
}
- fuse_put_request(fc, forget_req);
+ kfree(forget);
if (S_ISDIR(inode->i_mode)) {
struct dentry *alias;
@@ -541,7 +544,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
return 0;
out_put_forget_req:
- fuse_put_request(fc, forget_req);
+ kfree(forget);
return err;
}
@@ -981,12 +984,15 @@ static int fuse_access(struct inode *inode, int mask)
* access request is sent. Execute permission is still checked
* locally based on file mode.
*/
-static int fuse_permission(struct inode *inode, int mask)
+static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
{
struct fuse_conn *fc = get_fuse_conn(inode);
bool refreshed = false;
int err = 0;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
if (!fuse_allow_task(fc, current))
return -EACCES;
@@ -1001,7 +1007,7 @@ static int fuse_permission(struct inode *inode, int mask)
}
if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
- err = generic_permission(inode, mask, NULL);
+ err = generic_permission(inode, mask, flags, NULL);
/* If permission is denied, try to refresh file
attributes. This is also needed, because the root
@@ -1009,7 +1015,8 @@ static int fuse_permission(struct inode *inode, int mask)
if (err == -EACCES && !refreshed) {
err = fuse_do_getattr(inode, NULL, NULL);
if (!err)
- err = generic_permission(inode, mask, NULL);
+ err = generic_permission(inode, mask,
+ flags, NULL);
}
/* Note: the opposite of the above test does not
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c822458..95da1bc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/module.h>
+#include <linux/compat.h>
static const struct file_operations fuse_direct_io_file_operations;
@@ -134,6 +135,7 @@ EXPORT_SYMBOL_GPL(fuse_do_open);
void fuse_finish_open(struct inode *inode, struct file *file)
{
struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = get_fuse_conn(inode);
if (ff->open_flags & FOPEN_DIRECT_IO)
file->f_op = &fuse_direct_io_file_operations;
@@ -141,6 +143,15 @@ void fuse_finish_open(struct inode *inode, struct file *file)
invalidate_inode_pages2(inode->i_mapping);
if (ff->open_flags & FOPEN_NONSEEKABLE)
nonseekable_open(inode, file);
+ if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fc->lock);
+ fi->attr_version = ++fc->attr_version;
+ i_size_write(inode, 0);
+ spin_unlock(&fc->lock);
+ fuse_invalidate_attr(inode);
+ }
}
int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
@@ -1618,6 +1629,94 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
}
/*
+ * CUSE servers compiled on 32bit broke on 64bit kernels because the
+ * ABI was defined to be 'struct iovec' which is different on 32bit
+ * and 64bit. Fortunately we can determine which structure the server
+ * used from the size of the reply.
+ */
+static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
+ size_t transferred, unsigned count,
+ bool is_compat)
+{
+#ifdef CONFIG_COMPAT
+ if (count * sizeof(struct compat_iovec) == transferred) {
+ struct compat_iovec *ciov = src;
+ unsigned i;
+
+ /*
+ * With this interface a 32bit server cannot support
+ * non-compat (i.e. ones coming from 64bit apps) ioctl
+ * requests
+ */
+ if (!is_compat)
+ return -EINVAL;
+
+ for (i = 0; i < count; i++) {
+ dst[i].iov_base = compat_ptr(ciov[i].iov_base);
+ dst[i].iov_len = ciov[i].iov_len;
+ }
+ return 0;
+ }
+#endif
+
+ if (count * sizeof(struct iovec) != transferred)
+ return -EIO;
+
+ memcpy(dst, src, transferred);
+ return 0;
+}
+
+/* Make sure iov_length() won't overflow */
+static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
+{
+ size_t n;
+ u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
+
+ for (n = 0; n < count; n++) {
+ if (iov->iov_len > (size_t) max)
+ return -ENOMEM;
+ max -= iov->iov_len;
+ }
+ return 0;
+}
+
+static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst,
+ void *src, size_t transferred, unsigned count,
+ bool is_compat)
+{
+ unsigned i;
+ struct fuse_ioctl_iovec *fiov = src;
+
+ if (fc->minor < 16) {
+ return fuse_copy_ioctl_iovec_old(dst, src, transferred,
+ count, is_compat);
+ }
+
+ if (count * sizeof(struct fuse_ioctl_iovec) != transferred)
+ return -EIO;
+
+ for (i = 0; i < count; i++) {
+ /* Did the server supply an inappropriate value? */
+ if (fiov[i].base != (unsigned long) fiov[i].base ||
+ fiov[i].len != (unsigned long) fiov[i].len)
+ return -EIO;
+
+ dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base;
+ dst[i].iov_len = (size_t) fiov[i].len;
+
+#ifdef CONFIG_COMPAT
+ if (is_compat &&
+ (ptr_to_compat(dst[i].iov_base) != fiov[i].base ||
+ (compat_size_t) dst[i].iov_len != fiov[i].len))
+ return -EIO;
+#endif
+ }
+
+ return 0;
+}
+
+
+/*
* For ioctls, there is no generic way to determine how much memory
* needs to be read and/or written. Furthermore, ioctls are allowed
* to dereference the passed pointer, so the parameter requires deep
@@ -1677,18 +1776,25 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
struct fuse_ioctl_out outarg;
struct fuse_req *req = NULL;
struct page **pages = NULL;
- struct page *iov_page = NULL;
+ struct iovec *iov_page = NULL;
struct iovec *in_iov = NULL, *out_iov = NULL;
unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
size_t in_size, out_size, transferred;
int err;
+#if BITS_PER_LONG == 32
+ inarg.flags |= FUSE_IOCTL_32BIT;
+#else
+ if (flags & FUSE_IOCTL_COMPAT)
+ inarg.flags |= FUSE_IOCTL_32BIT;
+#endif
+
/* assume all the iovs returned by client always fits in a page */
- BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
+ BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
err = -ENOMEM;
pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
- iov_page = alloc_page(GFP_KERNEL);
+ iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
if (!pages || !iov_page)
goto out;
@@ -1697,7 +1803,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
* RETRY from server is not allowed.
*/
if (!(flags & FUSE_IOCTL_UNRESTRICTED)) {
- struct iovec *iov = page_address(iov_page);
+ struct iovec *iov = iov_page;
iov->iov_base = (void __user *)arg;
iov->iov_len = _IOC_SIZE(cmd);
@@ -1778,7 +1884,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
/* did it ask for retry? */
if (outarg.flags & FUSE_IOCTL_RETRY) {
- char *vaddr;
+ void *vaddr;
/* no retry if in restricted mode */
err = -EIO;
@@ -1798,18 +1904,25 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
goto out;
- err = -EIO;
- if ((in_iovs + out_iovs) * sizeof(struct iovec) != transferred)
- goto out;
-
- /* okay, copy in iovs and retry */
vaddr = kmap_atomic(pages[0], KM_USER0);
- memcpy(page_address(iov_page), vaddr, transferred);
+ err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
+ transferred, in_iovs + out_iovs,
+ (flags & FUSE_IOCTL_COMPAT) != 0);
kunmap_atomic(vaddr, KM_USER0);
+ if (err)
+ goto out;
- in_iov = page_address(iov_page);
+ in_iov = iov_page;
out_iov = in_iov + in_iovs;
+ err = fuse_verify_ioctl_iov(in_iov, in_iovs);
+ if (err)
+ goto out;
+
+ err = fuse_verify_ioctl_iov(out_iov, out_iovs);
+ if (err)
+ goto out;
+
goto retry;
}
@@ -1821,8 +1934,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
out:
if (req)
fuse_put_request(fc, req);
- if (iov_page)
- __free_page(iov_page);
+ free_page((unsigned long) iov_page);
while (num_pages)
__free_page(pages[--num_pages]);
kfree(pages);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 57d4a3a..ae5744a 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -53,6 +53,12 @@ extern struct mutex fuse_mutex;
extern unsigned max_user_bgreq;
extern unsigned max_user_congthresh;
+/* One forget request */
+struct fuse_forget_link {
+ struct fuse_forget_one forget_one;
+ struct fuse_forget_link *next;
+};
+
/** FUSE inode */
struct fuse_inode {
/** Inode data */
@@ -66,7 +72,7 @@ struct fuse_inode {
u64 nlookup;
/** The request used for sending the FORGET message */
- struct fuse_req *forget_req;
+ struct fuse_forget_link *forget;
/** Time in jiffies until the file attributes are valid */
u64 i_time;
@@ -255,7 +261,6 @@ struct fuse_req {
/** Data for asynchronous requests */
union {
- struct fuse_forget_in forget_in;
struct {
struct fuse_release_in in;
struct path path;
@@ -369,6 +374,13 @@ struct fuse_conn {
/** Pending interrupts */
struct list_head interrupts;
+ /** Queue of pending forgets */
+ struct fuse_forget_link forget_list_head;
+ struct fuse_forget_link *forget_list_tail;
+
+ /** Batching of FORGET requests (positive indicates FORGET batch) */
+ int forget_batch;
+
/** Flag indicating if connection is blocked. This will be
the case before the INIT reply is received, and if there
are too many outstading backgrounds requests */
@@ -543,8 +555,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
/**
* Send FORGET command
*/
-void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
- u64 nodeid, u64 nlookup);
+void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
+ u64 nodeid, u64 nlookup);
+
+struct fuse_forget_link *fuse_alloc_forget(void);
/**
* Initialize READ or READDIR request
@@ -656,11 +670,6 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
/**
- * Send a request with no reply
- */
-void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
-
-/**
* Send a request in the background
*/
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index da9e6e1..f62b32c 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -71,6 +71,11 @@ struct fuse_mount_data {
unsigned blksize;
};
+struct fuse_forget_link *fuse_alloc_forget()
+{
+ return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
+}
+
static struct inode *fuse_alloc_inode(struct super_block *sb)
{
struct inode *inode;
@@ -90,8 +95,8 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
INIT_LIST_HEAD(&fi->queued_writes);
INIT_LIST_HEAD(&fi->writepages);
init_waitqueue_head(&fi->page_waitq);
- fi->forget_req = fuse_request_alloc();
- if (!fi->forget_req) {
+ fi->forget = fuse_alloc_forget();
+ if (!fi->forget) {
kmem_cache_free(fuse_inode_cachep, inode);
return NULL;
}
@@ -99,27 +104,20 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
return inode;
}
-static void fuse_destroy_inode(struct inode *inode)
+static void fuse_i_callback(struct rcu_head *head)
{
- struct fuse_inode *fi = get_fuse_inode(inode);
- BUG_ON(!list_empty(&fi->write_files));
- BUG_ON(!list_empty(&fi->queued_writes));
- if (fi->forget_req)
- fuse_request_free(fi->forget_req);
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(fuse_inode_cachep, inode);
}
-void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
- u64 nodeid, u64 nlookup)
+static void fuse_destroy_inode(struct inode *inode)
{
- struct fuse_forget_in *inarg = &req->misc.forget_in;
- inarg->nlookup = nlookup;
- req->in.h.opcode = FUSE_FORGET;
- req->in.h.nodeid = nodeid;
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(struct fuse_forget_in);
- req->in.args[0].value = inarg;
- fuse_request_send_noreply(fc, req);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ BUG_ON(!list_empty(&fi->write_files));
+ BUG_ON(!list_empty(&fi->queued_writes));
+ kfree(fi->forget);
+ call_rcu(&inode->i_rcu, fuse_i_callback);
}
static void fuse_evict_inode(struct inode *inode)
@@ -129,8 +127,8 @@ static void fuse_evict_inode(struct inode *inode)
if (inode->i_sb->s_flags & MS_ACTIVE) {
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
- fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup);
- fi->forget_req = NULL;
+ fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
+ fi->forget = NULL;
}
}
@@ -534,6 +532,7 @@ void fuse_conn_init(struct fuse_conn *fc)
INIT_LIST_HEAD(&fc->interrupts);
INIT_LIST_HEAD(&fc->bg_queue);
INIT_LIST_HEAD(&fc->entry);
+ fc->forget_list_tail = &fc->forget_list_head;
atomic_set(&fc->num_waiting, 0);
fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
@@ -619,7 +618,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
entry = d_obtain_alias(inode);
if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) {
- entry->d_op = &fuse_dentry_operations;
+ d_set_d_op(entry, &fuse_dentry_operations);
fuse_invalidate_entry_cache(entry);
}
@@ -721,7 +720,7 @@ static struct dentry *fuse_get_parent(struct dentry *child)
parent = d_obtain_alias(inode);
if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) {
- parent->d_op = &fuse_dentry_operations;
+ d_set_d_op(parent, &fuse_dentry_operations);
fuse_invalidate_entry_cache(parent);
}
@@ -1041,11 +1040,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
return err;
}
-static int fuse_get_sb(struct file_system_type *fs_type,
+static struct dentry *fuse_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *raw_data, struct vfsmount *mnt)
+ void *raw_data)
{
- return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
+ return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
}
static void fuse_kill_sb_anon(struct super_block *sb)
@@ -1065,17 +1064,16 @@ static struct file_system_type fuse_fs_type = {
.owner = THIS_MODULE,
.name = "fuse",
.fs_flags = FS_HAS_SUBTYPE,
- .get_sb = fuse_get_sb,
+ .mount = fuse_mount,
.kill_sb = fuse_kill_sb_anon,
};
#ifdef CONFIG_BLOCK
-static int fuse_get_sb_blk(struct file_system_type *fs_type,
+static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *raw_data, struct vfsmount *mnt)
+ void *raw_data)
{
- return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
}
static void fuse_kill_sb_blk(struct super_block *sb)
@@ -1094,7 +1092,7 @@ static void fuse_kill_sb_blk(struct super_block *sb)
static struct file_system_type fuseblk_fs_type = {
.owner = THIS_MODULE,
.name = "fuseblk",
- .get_sb = fuse_get_sb_blk,
+ .mount = fuse_mount_blk,
.kill_sb = fuse_kill_sb_blk,
.fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
};
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 6bc9e3a..06c48a8 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -190,14 +190,20 @@ generic_acl_chmod(struct inode *inode)
}
int
-generic_check_acl(struct inode *inode, int mask)
+generic_check_acl(struct inode *inode, int mask, unsigned int flags)
{
- struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
-
- if (acl) {
- int error = posix_acl_permission(inode, acl, mask);
- posix_acl_release(acl);
- return error;
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ } else {
+ struct posix_acl *acl;
+
+ acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
+ if (acl) {
+ int error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ return error;
+ }
}
return -EAGAIN;
}
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 48171f4..7118f1a 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -75,11 +75,14 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
* Returns: errno
*/
-int gfs2_check_acl(struct inode *inode, int mask)
+int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
{
struct posix_acl *acl;
int error;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index b522b0c..a93907c 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -16,7 +16,7 @@
#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
#define GFS2_ACL_MAX_ENTRIES 25
-extern int gfs2_check_acl(struct inode *inode, int mask);
+extern int gfs2_check_acl(struct inode *inode, int mask, unsigned int);
extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode);
extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
extern const struct xattr_handler gfs2_xattr_system_handler;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 5476c06..3c4039d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -763,7 +763,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
int metadata;
unsigned int revokes = 0;
int x;
- int error;
+ int error = 0;
if (!*top)
sm->sm_first = 0;
@@ -780,7 +780,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
if (metadata)
revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
- error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
+ if (ip != GFS2_I(sdp->sd_rindex))
+ error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
+ else if (!sdp->sd_rgrps)
+ error = gfs2_ri_update(ip);
+
if (error)
return error;
@@ -879,7 +883,8 @@ out_rg_gunlock:
out_rlist:
gfs2_rlist_free(&rlist);
out:
- gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
+ if (ip != GFS2_I(sdp->sd_rindex))
+ gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
return error;
}
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 6798755..4a45633 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -11,6 +11,7 @@
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
+#include <linux/namei.h>
#include <linux/crc32.h>
#include "gfs2.h"
@@ -34,15 +35,23 @@
static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct dentry *parent = dget_parent(dentry);
- struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode);
- struct gfs2_inode *dip = GFS2_I(parent->d_inode);
- struct inode *inode = dentry->d_inode;
+ struct dentry *parent;
+ struct gfs2_sbd *sdp;
+ struct gfs2_inode *dip;
+ struct inode *inode;
struct gfs2_holder d_gh;
struct gfs2_inode *ip = NULL;
int error;
int had_lock = 0;
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ parent = dget_parent(dentry);
+ sdp = GFS2_SB(parent->d_inode);
+ dip = GFS2_I(parent->d_inode);
+ inode = dentry->d_inode;
+
if (inode) {
if (is_bad_inode(inode))
goto invalid;
@@ -100,13 +109,14 @@ fail:
return 0;
}
-static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
+static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *str)
{
str->hash = gfs2_disk_hash(str->name, str->len);
return 0;
}
-static int gfs2_dentry_delete(struct dentry *dentry)
+static int gfs2_dentry_delete(const struct dentry *dentry)
{
struct gfs2_inode *ginode;
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 06d5827..97012ec 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1));
if (!IS_ERR(dentry))
- dentry->d_op = &gfs2_dops;
+ d_set_d_op(dentry, &gfs2_dops);
return dentry;
}
@@ -138,10 +138,8 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
struct gfs2_inum_host *inum)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
- struct gfs2_holder i_gh;
struct inode *inode;
struct dentry *dentry;
- int error;
inode = gfs2_ilookup(sb, inum->no_addr);
if (inode) {
@@ -152,52 +150,16 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
goto out_inode;
}
- error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
- LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
- if (error)
- return ERR_PTR(error);
-
- error = gfs2_check_blk_type(sdp, inum->no_addr, GFS2_BLKST_DINODE);
- if (error)
- goto fail;
-
- inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0);
- if (IS_ERR(inode)) {
- error = PTR_ERR(inode);
- goto fail;
- }
-
- error = gfs2_inode_refresh(GFS2_I(inode));
- if (error) {
- iput(inode);
- goto fail;
- }
-
- /* Pick up the works we bypass in gfs2_inode_lookup */
- if (inode->i_state & I_NEW)
- gfs2_set_iop(inode);
-
- if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
- iput(inode);
- goto fail;
- }
-
- error = -EIO;
- if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) {
- iput(inode);
- goto fail;
- }
-
- gfs2_glock_dq_uninit(&i_gh);
+ inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino,
+ GFS2_BLKST_DINODE);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
out_inode:
dentry = d_obtain_alias(inode);
if (!IS_ERR(dentry))
- dentry->d_op = &gfs2_dops;
+ d_set_d_op(dentry, &gfs2_dops);
return dentry;
-fail:
- gfs2_glock_dq_uninit(&i_gh);
- return ERR_PTR(error);
}
static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index aa99647..fca6689 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -241,7 +241,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
!capable(CAP_LINUX_IMMUTABLE))
goto out;
if (!IS_IMMUTABLE(inode)) {
- error = gfs2_permission(inode, MAY_WRITE);
+ error = gfs2_permission(inode, MAY_WRITE, 0);
if (error)
goto out;
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8777885..08a8beb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -541,21 +541,6 @@ out_locked:
spin_unlock(&gl->gl_spin);
}
-static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
- unsigned int req_state,
- unsigned int flags)
-{
- int ret = LM_OUT_ERROR;
-
- if (!sdp->sd_lockstruct.ls_ops->lm_lock)
- return req_state == LM_ST_UNLOCKED ? 0 : req_state;
-
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
- req_state, flags);
- return ret;
-}
-
/**
* do_xmote - Calls the DLM to change the state of a lock
* @gl: The lock state
@@ -575,13 +560,14 @@ __acquires(&gl->gl_spin)
lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
LM_FLAG_PRIORITY);
- BUG_ON(gl->gl_state == target);
- BUG_ON(gl->gl_state == gl->gl_target);
+ GLOCK_BUG_ON(gl, gl->gl_state == target);
+ GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
glops->go_inval) {
set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
do_error(gl, 0); /* Fail queued try locks */
}
+ gl->gl_req = target;
spin_unlock(&gl->gl_spin);
if (glops->go_xmote_th)
glops->go_xmote_th(gl);
@@ -594,15 +580,17 @@ __acquires(&gl->gl_spin)
gl->gl_state == LM_ST_DEFERRED) &&
!(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
lck_flags |= LM_FLAG_TRY_1CB;
- ret = gfs2_lm_lock(sdp, gl, target, lck_flags);
- if (!(ret & LM_OUT_ASYNC)) {
- finish_xmote(gl, ret);
+ if (sdp->sd_lockstruct.ls_ops->lm_lock) {
+ /* lock_dlm */
+ ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
+ GLOCK_BUG_ON(gl, ret);
+ } else { /* lock_nolock */
+ finish_xmote(gl, target);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
gfs2_glock_put(gl);
- } else {
- GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC);
}
+
spin_lock(&gl->gl_spin);
}
@@ -686,21 +674,20 @@ static void delete_work_func(struct work_struct *work)
{
struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
struct gfs2_sbd *sdp = gl->gl_sbd;
- struct gfs2_inode *ip = NULL;
+ struct gfs2_inode *ip;
struct inode *inode;
- u64 no_addr = 0;
+ u64 no_addr = gl->gl_name.ln_number;
+
+ ip = gl->gl_object;
+ /* Note: Unsafe to dereference ip as we don't hold right refs/locks */
- spin_lock(&gl->gl_spin);
- ip = (struct gfs2_inode *)gl->gl_object;
if (ip)
- no_addr = ip->i_no_addr;
- spin_unlock(&gl->gl_spin);
- if (ip) {
inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
- if (inode) {
- d_prune_aliases(inode);
- iput(inode);
- }
+ else
+ inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
+ if (inode && !IS_ERR(inode)) {
+ d_prune_aliases(inode);
+ iput(inode);
}
gfs2_glock_put(gl);
}
@@ -952,17 +939,22 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
+
if (seq) {
struct gfs2_glock_iter *gi = seq->private;
vsprintf(gi->string, fmt, args);
seq_printf(seq, gi->string);
} else {
- printk(KERN_ERR " ");
- vprintk(fmt, args);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_ERR " %pV", &vaf);
}
+
va_end(args);
}
@@ -1362,24 +1354,28 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl)
* @gl: Pointer to the glock
* @ret: The return value from the dlm
*
+ * The gl_reply field is under the gl_spin lock so that it is ok
+ * to use a bitfield shared with other glock state fields.
*/
void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
{
struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ spin_lock(&gl->gl_spin);
gl->gl_reply = ret;
if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
- spin_lock(&gl->gl_spin);
if (gfs2_should_freeze(gl)) {
set_bit(GLF_FROZEN, &gl->gl_flags);
spin_unlock(&gl->gl_spin);
return;
}
- spin_unlock(&gl->gl_spin);
}
+
+ spin_unlock(&gl->gl_spin);
set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+ smp_wmb();
gfs2_glock_hold(gl);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
gfs2_glock_put(gl);
@@ -1627,18 +1623,17 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
{
struct task_struct *gh_owner = NULL;
- char buffer[KSYM_SYMBOL_LEN];
char flags_buf[32];
- sprint_symbol(buffer, gh->gh_ip);
if (gh->gh_owner_pid)
gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
- gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n",
- state2str(gh->gh_state),
- hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
- gh->gh_error,
- gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
- gh_owner ? gh_owner->comm : "(ended)", buffer);
+ gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
+ state2str(gh->gh_state),
+ hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
+ gh->gh_error,
+ gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
+ gh_owner ? gh_owner->comm : "(ended)",
+ (void *)gh->gh_ip);
return 0;
}
@@ -1783,12 +1778,13 @@ int __init gfs2_glock_init(void)
}
#endif
- glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER |
+ glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
WQ_HIGHPRI | WQ_FREEZEABLE, 0);
if (IS_ERR(glock_workqueue))
return PTR_ERR(glock_workqueue);
- gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER |
- WQ_FREEZEABLE, 0);
+ gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
+ WQ_MEM_RECLAIM | WQ_FREEZEABLE,
+ 0);
if (IS_ERR(gfs2_delete_workqueue)) {
destroy_workqueue(glock_workqueue);
return PTR_ERR(gfs2_delete_workqueue);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index db1c26d..691851c 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -87,11 +87,10 @@ enum {
#define GL_ASYNC 0x00000040
#define GL_EXACT 0x00000080
#define GL_SKIP 0x00000100
-#define GL_ATIME 0x00000200
#define GL_NOCACHE 0x00000400
/*
- * lm_lock() and lm_async_cb return flags
+ * lm_async_cb return flags
*
* LM_OUT_ST_MASK
* Masks the lower two bits of lock state in the returned value.
@@ -99,15 +98,11 @@ enum {
* LM_OUT_CANCELED
* The lock request was canceled.
*
- * LM_OUT_ASYNC
- * The result of the request will be returned in an LM_CB_ASYNC callback.
- *
*/
#define LM_OUT_ST_MASK 0x00000003
#define LM_OUT_CANCELED 0x00000008
-#define LM_OUT_ASYNC 0x00000080
-#define LM_OUT_ERROR 0x00000100
+#define LM_OUT_ERROR 0x00000004
/*
* lm_recovery_done() messages
@@ -124,25 +119,12 @@ struct lm_lockops {
void (*lm_unmount) (struct gfs2_sbd *sdp);
void (*lm_withdraw) (struct gfs2_sbd *sdp);
void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
- unsigned int (*lm_lock) (struct gfs2_glock *gl,
- unsigned int req_state, unsigned int flags);
+ int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
+ unsigned int flags);
void (*lm_cancel) (struct gfs2_glock *gl);
const match_table_t *lm_tokens;
};
-#define LM_FLAG_TRY 0x00000001
-#define LM_FLAG_TRY_1CB 0x00000002
-#define LM_FLAG_NOEXP 0x00000004
-#define LM_FLAG_ANY 0x00000008
-#define LM_FLAG_PRIORITY 0x00000010
-
-#define GL_ASYNC 0x00000040
-#define GL_EXACT 0x00000080
-#define GL_SKIP 0x00000100
-#define GL_NOCACHE 0x00000400
-
-#define GLR_TRYFAILED 13
-
extern struct workqueue_struct *gfs2_delete_workqueue;
static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
{
@@ -212,6 +194,8 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
+
+__attribute__ ((format(printf, 2, 3)))
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
/**
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 0d149dc..263561b 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -325,7 +325,6 @@ static void trans_go_sync(struct gfs2_glock *gl)
if (gl->gl_state != LM_ST_UNLOCKED &&
test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
- flush_workqueue(gfs2_delete_workqueue);
gfs2_meta_syncfs(sdp);
gfs2_log_shutdown(sdp);
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 764fbb4..a79790c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -11,6 +11,7 @@
#define __INCORE_DOT_H__
#include <linux/fs.h>
+#include <linux/kobject.h>
#include <linux/workqueue.h>
#include <linux/dlm.h>
#include <linux/buffer_head.h>
@@ -207,12 +208,14 @@ struct gfs2_glock {
spinlock_t gl_spin;
- unsigned int gl_state;
- unsigned int gl_target;
- unsigned int gl_reply;
+ /* State fields protected by gl_spin */
+ unsigned int gl_state:2, /* Current state */
+ gl_target:2, /* Target state */
+ gl_demote_state:2, /* State requested by remote node */
+ gl_req:2, /* State in last dlm request */
+ gl_reply:8; /* Last reply from the dlm */
+
unsigned int gl_hash;
- unsigned int gl_req;
- unsigned int gl_demote_state; /* state requested by remote node */
unsigned long gl_demote_time; /* time of first demote request */
struct list_head gl_holders;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 06370f8..2232b3c 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -73,49 +73,6 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
}
-struct gfs2_skip_data {
- u64 no_addr;
- int skipped;
-};
-
-static int iget_skip_test(struct inode *inode, void *opaque)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_skip_data *data = opaque;
-
- if (ip->i_no_addr == data->no_addr) {
- if (inode->i_state & (I_FREEING|I_WILL_FREE)){
- data->skipped = 1;
- return 0;
- }
- return 1;
- }
- return 0;
-}
-
-static int iget_skip_set(struct inode *inode, void *opaque)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_skip_data *data = opaque;
-
- if (data->skipped)
- return 1;
- inode->i_ino = (unsigned long)(data->no_addr);
- ip->i_no_addr = data->no_addr;
- return 0;
-}
-
-static struct inode *gfs2_iget_skip(struct super_block *sb,
- u64 no_addr)
-{
- struct gfs2_skip_data data;
- unsigned long hash = (unsigned long)no_addr;
-
- data.no_addr = no_addr;
- data.skipped = 0;
- return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
-}
-
/**
* GFS2 lookup code fills in vfs inode contents based on info obtained
* from directory entry inside gfs2_inode_lookup(). This has caused issues
@@ -243,93 +200,54 @@ fail:
return ERR_PTR(error);
}
-/**
- * gfs2_process_unlinked_inode - Lookup an unlinked inode for reclamation
- * and try to reclaim it by doing iput.
- *
- * This function assumes no rgrp locks are currently held.
- *
- * @sb: The super block
- * no_addr: The inode number
- *
- */
-
-void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
+struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
+ u64 *no_formal_ino, unsigned int blktype)
{
- struct gfs2_sbd *sdp;
- struct gfs2_inode *ip;
- struct gfs2_glock *io_gl = NULL;
- int error;
- struct gfs2_holder gh;
+ struct super_block *sb = sdp->sd_vfs;
+ struct gfs2_holder i_gh;
struct inode *inode;
+ int error;
- inode = gfs2_iget_skip(sb, no_addr);
-
- if (!inode)
- return;
-
- /* If it's not a new inode, someone's using it, so leave it alone. */
- if (!(inode->i_state & I_NEW)) {
- iput(inode);
- return;
- }
-
- ip = GFS2_I(inode);
- sdp = GFS2_SB(inode);
- ip->i_no_formal_ino = -1;
+ error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
+ LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+ if (error)
+ return ERR_PTR(error);
- error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
- if (unlikely(error))
+ error = gfs2_check_blk_type(sdp, no_addr, blktype);
+ if (error)
goto fail;
- ip->i_gl->gl_object = ip;
-
- error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
- if (unlikely(error))
- goto fail_put;
- set_bit(GIF_INVALID, &ip->i_flags);
- error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT,
- &ip->i_iopen_gh);
- if (unlikely(error))
- goto fail_iopen;
+ inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
+ if (IS_ERR(inode))
+ goto fail;
- ip->i_iopen_gh.gh_gl->gl_object = ip;
- gfs2_glock_put(io_gl);
- io_gl = NULL;
+ error = gfs2_inode_refresh(GFS2_I(inode));
+ if (error)
+ goto fail_iput;
- inode->i_mode = DT2IF(DT_UNKNOWN);
+ /* Pick up the works we bypass in gfs2_inode_lookup */
+ if (inode->i_state & I_NEW)
+ gfs2_set_iop(inode);
- /*
- * We must read the inode in order to work out its type in
- * this case. Note that this doesn't happen often as we normally
- * know the type beforehand. This code path only occurs during
- * unlinked inode recovery (where it is safe to do this glock,
- * which is not true in the general case).
- */
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY,
- &gh);
- if (unlikely(error))
- goto fail_glock;
+ /* Two extra checks for NFS only */
+ if (no_formal_ino) {
+ error = -ESTALE;
+ if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino)
+ goto fail_iput;
- /* Inode is now uptodate */
- gfs2_glock_dq_uninit(&gh);
- gfs2_set_iop(inode);
+ error = -EIO;
+ if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM)
+ goto fail_iput;
- /* The iput will cause it to be deleted. */
- iput(inode);
- return;
+ error = 0;
+ }
-fail_glock:
- gfs2_glock_dq(&ip->i_iopen_gh);
-fail_iopen:
- if (io_gl)
- gfs2_glock_put(io_gl);
-fail_put:
- ip->i_gl->gl_object = NULL;
- gfs2_glock_put(ip->i_gl);
fail:
- iget_failed(inode);
- return;
+ gfs2_glock_dq_uninit(&i_gh);
+ return error ? ERR_PTR(error) : inode;
+fail_iput:
+ iput(inode);
+ goto fail;
}
static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
@@ -591,7 +509,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
}
if (!is_root) {
- error = gfs2_permission(dir, MAY_EXEC);
+ error = gfs2_permission(dir, MAY_EXEC, 0);
if (error)
goto out;
}
@@ -621,7 +539,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
{
int error;
- error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
if (error)
return error;
@@ -998,17 +916,8 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
if (error)
return error;
- if ((attr->ia_valid & ATTR_SIZE) &&
- attr->ia_size != i_size_read(inode)) {
- error = vmtruncate(inode, attr->ia_size);
- if (error)
- return error;
- }
-
setattr_copy(inode, attr);
mark_inode_dirty(inode);
-
- gfs2_assert_warn(GFS2_SB(inode), !error);
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 6720d7d..732a183 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -99,7 +99,9 @@ err:
extern void gfs2_set_iop(struct inode *inode);
extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
u64 no_addr, u64 no_formal_ino);
-extern void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr);
+extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
+ u64 *no_formal_ino,
+ unsigned int blktype);
extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
extern int gfs2_inode_refresh(struct gfs2_inode *ip);
@@ -111,7 +113,7 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
const struct qstr *name,
unsigned int mode, dev_t dev);
-extern int gfs2_permission(struct inode *inode, int mask);
+extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 1c09425..6e493ae 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -146,15 +146,13 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
return lkf;
}
-static unsigned int gdlm_lock(struct gfs2_glock *gl,
- unsigned int req_state, unsigned int flags)
+static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
+ unsigned int flags)
{
struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
- int error;
int req;
u32 lkf;
- gl->gl_req = req_state;
req = make_mode(req_state);
lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
@@ -162,13 +160,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl,
* Submit the actual lock request.
*/
- error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
- GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
- if (error == -EAGAIN)
- return 0;
- if (error)
- return LM_OUT_ERROR;
- return LM_OUT_ASYNC;
+ return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
+ GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
}
static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cade1ac..2aeabd4 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
iput(inode);
return -ENOMEM;
}
- dentry->d_op = &gfs2_dops;
+ d_set_d_op(dentry, &gfs2_dops);
*dptr = dentry;
return 0;
}
@@ -1250,12 +1250,11 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
}
/**
- * gfs2_get_sb - Get the GFS2 superblock
+ * gfs2_mount - Get the GFS2 superblock
* @fs_type: The GFS2 filesystem type
* @flags: Mount flags
* @dev_name: The name of the device
* @data: The mount arguments
- * @mnt: The vfsmnt for this mount
*
* Q. Why not use get_sb_bdev() ?
* A. We need to select one of two root directories to mount, independent
@@ -1264,8 +1263,8 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
* Returns: 0 or -ve on error
*/
-static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
struct block_device *bdev;
struct super_block *s;
@@ -1279,7 +1278,7 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
bdev = open_bdev_exclusive(dev_name, mode, fs_type);
if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ return ERR_CAST(bdev);
/*
* once the super is inserted into the list by sget, s_umount
@@ -1298,6 +1297,9 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
if (IS_ERR(s))
goto error_bdev;
+ if (s->s_root)
+ close_bdev_exclusive(bdev, mode);
+
memset(&args, 0, sizeof(args));
args.ar_quota = GFS2_QUOTA_DEFAULT;
args.ar_data = GFS2_DATA_DEFAULT;
@@ -1309,17 +1311,13 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
error = gfs2_mount_args(&args, data);
if (error) {
printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
- if (s->s_root)
- goto error_super;
- deactivate_locked_super(s);
- return error;
+ goto error_super;
}
if (s->s_root) {
error = -EBUSY;
if ((flags ^ s->s_flags) & MS_RDONLY)
goto error_super;
- close_bdev_exclusive(bdev, mode);
} else {
char b[BDEVNAME_SIZE];
@@ -1328,27 +1326,24 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
sb_set_blocksize(s, block_size(bdev));
error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
- if (error) {
- deactivate_locked_super(s);
- return error;
- }
+ if (error)
+ goto error_super;
s->s_flags |= MS_ACTIVE;
bdev->bd_super = s;
}
sdp = s->s_fs_info;
- mnt->mnt_sb = s;
if (args.ar_meta)
- mnt->mnt_root = dget(sdp->sd_master_dir);
+ return dget(sdp->sd_master_dir);
else
- mnt->mnt_root = dget(sdp->sd_root_dir);
- return 0;
+ return dget(sdp->sd_root_dir);
error_super:
deactivate_locked_super(s);
+ return ERR_PTR(error);
error_bdev:
close_bdev_exclusive(bdev, mode);
- return error;
+ return ERR_PTR(error);
}
static int set_meta_super(struct super_block *s, void *ptr)
@@ -1356,8 +1351,8 @@ static int set_meta_super(struct super_block *s, void *ptr)
return -EINVAL;
}
-static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
struct super_block *s;
struct gfs2_sbd *sdp;
@@ -1368,23 +1363,21 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
if (error) {
printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
dev_name, error);
- return error;
+ return ERR_PTR(error);
}
s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super,
path.dentry->d_inode->i_sb->s_bdev);
path_put(&path);
if (IS_ERR(s)) {
printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
- return PTR_ERR(s);
+ return ERR_CAST(s);
}
if ((flags ^ s->s_flags) & MS_RDONLY) {
deactivate_locked_super(s);
- return -EBUSY;
+ return ERR_PTR(-EBUSY);
}
sdp = s->s_fs_info;
- mnt->mnt_sb = s;
- mnt->mnt_root = dget(sdp->sd_master_dir);
- return 0;
+ return dget(sdp->sd_master_dir);
}
static void gfs2_kill_sb(struct super_block *sb)
@@ -1410,7 +1403,7 @@ static void gfs2_kill_sb(struct super_block *sb)
struct file_system_type gfs2_fs_type = {
.name = "gfs2",
.fs_flags = FS_REQUIRES_DEV,
- .get_sb = gfs2_get_sb,
+ .mount = gfs2_mount,
.kill_sb = gfs2_kill_sb,
.owner = THIS_MODULE,
};
@@ -1418,7 +1411,7 @@ struct file_system_type gfs2_fs_type = {
struct file_system_type gfs2meta_fs_type = {
.name = "gfs2meta",
.fs_flags = FS_REQUIRES_DEV,
- .get_sb = gfs2_get_sb_meta,
+ .mount = gfs2_mount_meta,
.owner = THIS_MODULE,
};
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 12cbea7..1501db4 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
{
struct inode *inode = NULL;
- dentry->d_op = &gfs2_dops;
+ d_set_d_op(dentry, &gfs2_dops);
inode = gfs2_lookupi(dir, &dentry->d_name, 0);
if (inode && IS_ERR(inode))
@@ -166,7 +166,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (error)
goto out_child;
- error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
if (error)
goto out_gunlock;
@@ -289,7 +289,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
if (IS_APPEND(&dip->i_inode))
return -EPERM;
- error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
if (error)
return error;
@@ -822,7 +822,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
}
}
} else {
- error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
if (error)
goto out_gunlock;
@@ -857,7 +857,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
/* Check out the dir to be renamed */
if (dir_rename) {
- error = gfs2_permission(odentry->d_inode, MAY_WRITE);
+ error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
if (error)
goto out_gunlock;
}
@@ -1041,13 +1041,17 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
* Returns: errno
*/
-int gfs2_permission(struct inode *inode, int mask)
+int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
{
- struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_inode *ip;
struct gfs2_holder i_gh;
int error;
int unlock = 0;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
+ ip = GFS2_I(inode);
if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
@@ -1058,7 +1062,7 @@ int gfs2_permission(struct inode *inode, int mask)
if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
error = -EACCES;
else
- error = generic_permission(inode, mask, gfs2_check_acl);
+ error = generic_permission(inode, mask, flags, gfs2_check_acl);
if (unlock)
gfs2_glock_dq_uninit(&i_gh);
@@ -1069,7 +1073,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct buffer_head *dibh;
u32 ouid, ogid, nuid, ngid;
int error;
@@ -1100,25 +1103,10 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
if (error)
goto out_gunlock_q;
- error = gfs2_meta_inode_buffer(ip, &dibh);
+ error = gfs2_setattr_simple(ip, attr);
if (error)
goto out_end_trans;
- if ((attr->ia_valid & ATTR_SIZE) &&
- attr->ia_size != i_size_read(inode)) {
- int error;
-
- error = vmtruncate(inode, attr->ia_size);
- gfs2_assert_warn(sdp, !error);
- }
-
- setattr_copy(inode, attr);
- mark_inode_dirty(inode);
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(ip, dibh->b_data);
- brelse(dibh);
-
if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
gfs2_quota_change(ip, -blocks, ouid, ogid);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 58a9b99..a689901 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -631,6 +631,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
struct fs_disk_quota *fdq)
{
struct inode *inode = &ip->i_inode;
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
struct address_space *mapping = inode->i_mapping;
unsigned long index = loc >> PAGE_CACHE_SHIFT;
unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
@@ -658,13 +659,17 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
qd->qd_qb.qb_value = qp->qu_value;
if (fdq) {
if (fdq->d_fieldmask & FS_DQ_BSOFT) {
- qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit);
+ qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
qd->qd_qb.qb_warn = qp->qu_warn;
}
if (fdq->d_fieldmask & FS_DQ_BHARD) {
- qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit);
+ qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
qd->qd_qb.qb_limit = qp->qu_limit;
}
+ if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
+ qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
+ qd->qd_qb.qb_value = qp->qu_value;
+ }
}
/* Write the quota into the quota file on disk */
@@ -1497,9 +1502,9 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
fdq->d_version = FS_DQUOT_VERSION;
fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
fdq->d_id = id;
- fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit);
- fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn);
- fdq->d_bcount = be64_to_cpu(qlvb->qb_value);
+ fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
+ fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
+ fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
gfs2_glock_dq_uninit(&q_gh);
out:
@@ -1508,7 +1513,7 @@ out:
}
/* GFS2 only supports a subset of the XFS fields */
-#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD)
+#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT)
static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
struct fs_disk_quota *fdq)
@@ -1566,11 +1571,17 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
/* If nothing has changed, this is a no-op */
if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
- (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn)))
+ ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
fdq->d_fieldmask ^= FS_DQ_BSOFT;
+
if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
- (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit)))
+ ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
fdq->d_fieldmask ^= FS_DQ_BHARD;
+
+ if ((fdq->d_fieldmask & FS_DQ_BCOUNT) &&
+ ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value)))
+ fdq->d_fieldmask ^= FS_DQ_BCOUNT;
+
if (fdq->d_fieldmask == 0)
goto out_i;
@@ -1619,4 +1630,3 @@ const struct quotactl_ops gfs2_quotactl_ops = {
.get_dqblk = gfs2_get_dqblk,
.set_dqblk = gfs2_set_dqblk,
};
-
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bef3ab6..7293ea2 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
for (rgrps = 0;; rgrps++) {
loff_t pos = rgrps * sizeof(struct gfs2_rindex);
- if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode))
+ if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
break;
error = gfs2_internal_read(ip, &ra_state, buf, &pos,
sizeof(struct gfs2_rindex));
@@ -583,7 +583,7 @@ static int read_rindex_entry(struct gfs2_inode *ip,
* Returns: 0 on successful update, error code otherwise
*/
-static int gfs2_ri_update(struct gfs2_inode *ip)
+int gfs2_ri_update(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct inode *inode = &ip->i_inode;
@@ -614,46 +614,6 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
}
/**
- * gfs2_ri_update_special - Pull in a new resource index from the disk
- *
- * This is a special version that's safe to call from gfs2_inplace_reserve_i.
- * In this case we know that we don't have any resource groups in memory yet.
- *
- * @ip: pointer to the rindex inode
- *
- * Returns: 0 on successful update, error code otherwise
- */
-static int gfs2_ri_update_special(struct gfs2_inode *ip)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct inode *inode = &ip->i_inode;
- struct file_ra_state ra_state;
- struct gfs2_rgrpd *rgd;
- unsigned int max_data = 0;
- int error;
-
- file_ra_state_init(&ra_state, inode->i_mapping);
- for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
- /* Ignore partials */
- if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
- i_size_read(inode))
- break;
- error = read_rindex_entry(ip, &ra_state);
- if (error) {
- clear_rgrpdi(sdp);
- return error;
- }
- }
- list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
- if (rgd->rd_data > max_data)
- max_data = rgd->rd_data;
- sdp->sd_max_rg_data = max_data;
-
- sdp->sd_rindex_uptodate = 1;
- return 0;
-}
-
-/**
* gfs2_rindex_hold - Grab a lock on the rindex
* @sdp: The GFS2 superblock
* @ri_gh: the glock holder
@@ -963,17 +923,18 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
* The inode, if one has been found, in inode.
*/
-static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
- u64 skip)
+static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
{
u32 goal = 0, block;
u64 no_addr;
struct gfs2_sbd *sdp = rgd->rd_sbd;
unsigned int n;
+ struct gfs2_glock *gl;
+ struct gfs2_inode *ip;
+ int error;
+ int found = 0;
- for(;;) {
- if (goal >= rgd->rd_data)
- break;
+ while (goal < rgd->rd_data) {
down_write(&sdp->sd_log_flush_lock);
n = 1;
block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
@@ -990,11 +951,32 @@ static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
if (no_addr == skip)
continue;
*last_unlinked = no_addr;
- return no_addr;
+
+ error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl);
+ if (error)
+ continue;
+
+ /* If the inode is already in cache, we can ignore it here
+ * because the existing inode disposal code will deal with
+ * it when all refs have gone away. Accessing gl_object like
+ * this is not safe in general. Here it is ok because we do
+ * not dereference the pointer, and we only need an approx
+ * answer to whether it is NULL or not.
+ */
+ ip = gl->gl_object;
+
+ if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+ gfs2_glock_put(gl);
+ else
+ found++;
+
+ /* Limit reclaim to sensible number of tasks */
+ if (found > 2*NR_CPUS)
+ return;
}
rgd->rd_flags &= ~GFS2_RDF_CHECK;
- return 0;
+ return;
}
/**
@@ -1075,11 +1057,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
* Try to acquire rgrp in way which avoids contending with others.
*
* Returns: errno
- * unlinked: the block address of an unlinked block to be reclaimed
*/
-static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
- u64 *last_unlinked)
+static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd, *begin = NULL;
@@ -1089,7 +1069,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
int loops = 0;
int error, rg_locked;
- *unlinked = 0;
rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
while (rgd) {
@@ -1106,17 +1085,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
case 0:
if (try_rgrp_fit(rgd, al))
goto out;
- /* If the rg came in already locked, there's no
- way we can recover from a failed try_rgrp_unlink
- because that would require an iput which can only
- happen after the rgrp is unlocked. */
- if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
- *unlinked = try_rgrp_unlink(rgd, last_unlinked,
- ip->i_no_addr);
+ if (rgd->rd_flags & GFS2_RDF_CHECK)
+ try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
if (!rg_locked)
gfs2_glock_dq_uninit(&al->al_rgd_gh);
- if (*unlinked)
- return -EAGAIN;
/* fall through */
case GLR_TRYFAILED:
rgd = recent_rgrp_next(rgd);
@@ -1145,13 +1117,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
case 0:
if (try_rgrp_fit(rgd, al))
goto out;
- if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
- *unlinked = try_rgrp_unlink(rgd, last_unlinked,
- ip->i_no_addr);
+ if (rgd->rd_flags & GFS2_RDF_CHECK)
+ try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
if (!rg_locked)
gfs2_glock_dq_uninit(&al->al_rgd_gh);
- if (*unlinked)
- return -EAGAIN;
break;
case GLR_TRYFAILED:
@@ -1204,12 +1173,12 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_alloc *al = ip->i_alloc;
int error = 0;
- u64 last_unlinked = NO_BLOCK, unlinked;
+ u64 last_unlinked = NO_BLOCK;
+ int tries = 0;
if (gfs2_assert_warn(sdp, al->al_requested))
return -EINVAL;
-try_again:
if (hold_rindex) {
/* We need to hold the rindex unless the inode we're using is
the rindex itself, in which case it's already held. */
@@ -1217,32 +1186,33 @@ try_again:
error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
else if (!sdp->sd_rgrps) /* We may not have the rindex read
in, so: */
- error = gfs2_ri_update_special(ip);
+ error = gfs2_ri_update(ip);
+ if (error)
+ return error;
}
- if (error)
- return error;
+try_again:
+ do {
+ error = get_local_rgrp(ip, &last_unlinked);
+ /* If there is no space, flushing the log may release some */
+ if (error) {
+ if (ip == GFS2_I(sdp->sd_rindex) &&
+ !sdp->sd_rindex_uptodate) {
+ error = gfs2_ri_update(ip);
+ if (error)
+ return error;
+ goto try_again;
+ }
+ gfs2_log_flush(sdp, NULL);
+ }
+ } while (error && tries++ < 3);
- /* Find an rgrp suitable for allocation. If it encounters any unlinked
- dinodes along the way, error will equal -EAGAIN and unlinked will
- contains it block address. We then need to look up that inode and
- try to free it, and try the allocation again. */
- error = get_local_rgrp(ip, &unlinked, &last_unlinked);
if (error) {
if (hold_rindex && ip != GFS2_I(sdp->sd_rindex))
gfs2_glock_dq_uninit(&al->al_ri_gh);
- if (error != -EAGAIN)
- return error;
-
- gfs2_process_unlinked_inode(ip->i_inode.i_sb, unlinked);
- /* regardless of whether or not gfs2_process_unlinked_inode
- was successful, we don't want to repeat it again. */
- last_unlinked = unlinked;
- gfs2_log_flush(sdp, NULL);
- error = 0;
-
- goto try_again;
+ return error;
}
+
/* no error, so we have the rgrp set in the inode's allocation. */
al->al_file = file;
al->al_line = line;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 0e35c04..50c2bb0 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -48,6 +48,7 @@ extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
extern void gfs2_inplace_release(struct gfs2_inode *ip);
+extern int gfs2_ri_update(struct gfs2_inode *ip);
extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2b2c499..16c2eca 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1405,11 +1405,18 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
return &ip->i_inode;
}
-static void gfs2_destroy_inode(struct inode *inode)
+static void gfs2_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(gfs2_inode_cachep, inode);
}
+static void gfs2_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, gfs2_i_callback);
+}
+
const struct super_operations gfs2_super_ops = {
.alloc_inode = gfs2_alloc_inode,
.destroy_inode = gfs2_destroy_inode,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 30b58f07..439b61c0 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1296,10 +1296,8 @@ fail:
int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
{
- struct inode *inode = &ip->i_inode;
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_ea_location el;
- struct buffer_head *dibh;
int error;
error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el);
@@ -1321,26 +1319,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
if (error)
return error;
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto out_trans_end;
-
- if ((attr->ia_valid & ATTR_SIZE) &&
- attr->ia_size != i_size_read(inode)) {
- int error;
-
- error = vmtruncate(inode, attr->ia_size);
- gfs2_assert_warn(GFS2_SB(inode), !error);
- }
-
- setattr_copy(inode, attr);
- mark_inode_dirty(inode);
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(ip, dibh->b_data);
- brelse(dibh);
-
-out_trans_end:
+ error = gfs2_setattr_simple(ip, attr);
gfs2_trans_end(sdp);
return error;
}
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 2b3b861..ea4aefe 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
struct inode *inode = NULL;
int res;
- dentry->d_op = &hfs_dentry_operations;
+ d_set_d_op(dentry, &hfs_dentry_operations);
hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd);
hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index c8cffb8..ad97c2d 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -213,10 +213,14 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *);
/* string.c */
extern const struct dentry_operations hfs_dentry_operations;
-extern int hfs_hash_dentry(struct dentry *, struct qstr *);
+extern int hfs_hash_dentry(const struct dentry *, const struct inode *,
+ struct qstr *);
extern int hfs_strcmp(const unsigned char *, unsigned int,
const unsigned char *, unsigned int);
-extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
+extern int hfs_compare_dentry(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
/* trans.c */
extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *);
diff --git a/fs/hfs/string.c b/fs/hfs/string.c
index 927a5af..495a976 100644
--- a/fs/hfs/string.c
+++ b/fs/hfs/string.c
@@ -51,7 +51,8 @@ static unsigned char caseorder[256] = {
/*
* Hash a string to an integer in a case-independent way
*/
-int hfs_hash_dentry(struct dentry *dentry, struct qstr *this)
+int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *this)
{
const unsigned char *name = this->name;
unsigned int hash, len = this->len;
@@ -92,21 +93,21 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1,
* Test for equality of two strings in the HFS filename character ordering.
* return 1 on failure and 0 on success
*/
-int hfs_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2)
+int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
const unsigned char *n1, *n2;
- int len;
- len = s1->len;
if (len >= HFS_NAMELEN) {
- if (s2->len < HFS_NAMELEN)
+ if (name->len < HFS_NAMELEN)
return 1;
len = HFS_NAMELEN;
- } else if (len != s2->len)
+ } else if (len != name->len)
return 1;
- n1 = s1->name;
- n2 = s2->name;
+ n1 = str;
+ n2 = name->name;
while (len--) {
if (caseorder[*n1++] != caseorder[*n2++])
return 1;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 6ee1586..0bef62a 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -167,11 +167,18 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
return i ? &i->vfs_inode : NULL;
}
-static void hfs_destroy_inode(struct inode *inode)
+static void hfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
}
+static void hfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, hfs_i_callback);
+}
+
static const struct super_operations hfs_super_operations = {
.alloc_inode = hfs_alloc_inode,
.destroy_inode = hfs_destroy_inode,
@@ -427,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
if (!sb->s_root)
goto bail_iput;
- sb->s_root->d_op = &hfs_dentry_operations;
+ d_set_d_op(sb->s_root, &hfs_dentry_operations);
/* everything's okay */
return 0;
@@ -441,17 +448,16 @@ bail:
return res;
}
-static int hfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *hfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, hfs_fill_super);
}
static struct file_system_type hfs_fs_type = {
.owner = THIS_MODULE,
.name = "hfs",
- .get_sb = hfs_get_sb,
+ .mount = hfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c
index 7478f5c..19cf291 100644
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -8,15 +8,20 @@
* This file contains the code to do various system dependent things.
*/
+#include <linux/namei.h>
#include "hfs_fs.h"
/* dentry case-handling: just lowercase everything */
static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode;
int diff;
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = dentry->d_inode;
if(!inode)
return 1;
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index d182438..5d799c1 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -22,7 +22,8 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
return -ENOMEM;
fd->search_key = ptr;
fd->key = ptr + tree->max_key_len + 2;
- dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0));
+ dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n",
+ tree->cnid, __builtin_return_address(0));
mutex_lock(&tree->tree_lock);
return 0;
}
@@ -31,7 +32,8 @@ void hfs_find_exit(struct hfs_find_data *fd)
{
hfs_bnode_put(fd->bnode);
kfree(fd->search_key);
- dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0));
+ dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n",
+ fd->tree->cnid, __builtin_return_address(0));
mutex_unlock(&fd->tree->tree_lock);
fd->tree = NULL;
}
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index ad57f59..1cad80c 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -15,7 +15,8 @@
#define PAGE_CACHE_BITS (PAGE_CACHE_SIZE * 8)
-int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max)
+int hfsplus_block_allocate(struct super_block *sb, u32 size,
+ u32 offset, u32 *max)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
struct page *page;
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 29da657..1c42cc5 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -42,7 +42,7 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off)
{
__be16 data;
- // optimize later...
+ /* TODO: optimize later... */
hfs_bnode_read(node, &data, off, 2);
return be16_to_cpu(data);
}
@@ -50,7 +50,7 @@ u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off)
u8 hfs_bnode_read_u8(struct hfs_bnode *node, int off)
{
u8 data;
- // optimize later...
+ /* TODO: optimize later... */
hfs_bnode_read(node, &data, off, 1);
return data;
}
@@ -96,7 +96,7 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len)
void hfs_bnode_write_u16(struct hfs_bnode *node, int off, u16 data)
{
__be16 v = cpu_to_be16(data);
- // optimize later...
+ /* TODO: optimize later... */
hfs_bnode_write(node, &v, off, 2);
}
@@ -212,7 +212,8 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
dst_page--;
}
src -= len;
- memmove(kmap(*dst_page) + src, kmap(*src_page) + src, len);
+ memmove(kmap(*dst_page) + src,
+ kmap(*src_page) + src, len);
kunmap(*src_page);
set_page_dirty(*dst_page);
kunmap(*dst_page);
@@ -250,14 +251,16 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
if (src == dst) {
l = min(len, (int)PAGE_CACHE_SIZE - src);
- memmove(kmap(*dst_page) + src, kmap(*src_page) + src, l);
+ memmove(kmap(*dst_page) + src,
+ kmap(*src_page) + src, l);
kunmap(*src_page);
set_page_dirty(*dst_page);
kunmap(*dst_page);
while ((len -= l) != 0) {
l = min(len, (int)PAGE_CACHE_SIZE);
- memmove(kmap(*++dst_page), kmap(*++src_page), l);
+ memmove(kmap(*++dst_page),
+ kmap(*++src_page), l);
kunmap(*src_page);
set_page_dirty(*dst_page);
kunmap(*dst_page);
@@ -268,7 +271,8 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
do {
src_ptr = kmap(*src_page) + src;
dst_ptr = kmap(*dst_page) + dst;
- if (PAGE_CACHE_SIZE - src < PAGE_CACHE_SIZE - dst) {
+ if (PAGE_CACHE_SIZE - src <
+ PAGE_CACHE_SIZE - dst) {
l = PAGE_CACHE_SIZE - src;
src = 0;
dst += l;
@@ -340,7 +344,8 @@ void hfs_bnode_unlink(struct hfs_bnode *node)
return;
tmp->next = node->next;
cnid = cpu_to_be32(tmp->next);
- hfs_bnode_write(tmp, &cnid, offsetof(struct hfs_bnode_desc, next), 4);
+ hfs_bnode_write(tmp, &cnid,
+ offsetof(struct hfs_bnode_desc, next), 4);
hfs_bnode_put(tmp);
} else if (node->type == HFS_NODE_LEAF)
tree->leaf_head = node->next;
@@ -351,15 +356,15 @@ void hfs_bnode_unlink(struct hfs_bnode *node)
return;
tmp->prev = node->prev;
cnid = cpu_to_be32(tmp->prev);
- hfs_bnode_write(tmp, &cnid, offsetof(struct hfs_bnode_desc, prev), 4);
+ hfs_bnode_write(tmp, &cnid,
+ offsetof(struct hfs_bnode_desc, prev), 4);
hfs_bnode_put(tmp);
} else if (node->type == HFS_NODE_LEAF)
tree->leaf_tail = node->prev;
- // move down?
- if (!node->prev && !node->next) {
- printk(KERN_DEBUG "hfs_btree_del_level\n");
- }
+ /* move down? */
+ if (!node->prev && !node->next)
+ dprint(DBG_BNODE_MOD, "hfs_btree_del_level\n");
if (!node->parent) {
tree->root = 0;
tree->depth = 0;
@@ -379,16 +384,16 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid)
struct hfs_bnode *node;
if (cnid >= tree->node_count) {
- printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid);
+ printk(KERN_ERR "hfs: request for non-existent node "
+ "%d in B*Tree\n",
+ cnid);
return NULL;
}
for (node = tree->node_hash[hfs_bnode_hash(cnid)];
- node; node = node->next_hash) {
- if (node->this == cnid) {
+ node; node = node->next_hash)
+ if (node->this == cnid)
return node;
- }
- }
return NULL;
}
@@ -402,7 +407,9 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
loff_t off;
if (cnid >= tree->node_count) {
- printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid);
+ printk(KERN_ERR "hfs: request for non-existent node "
+ "%d in B*Tree\n",
+ cnid);
return NULL;
}
@@ -429,7 +436,8 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
} else {
spin_unlock(&tree->hash_lock);
kfree(node);
- wait_event(node2->lock_wq, !test_bit(HFS_BNODE_NEW, &node2->flags));
+ wait_event(node2->lock_wq,
+ !test_bit(HFS_BNODE_NEW, &node2->flags));
return node2;
}
spin_unlock(&tree->hash_lock);
@@ -483,7 +491,8 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num)
if (node) {
hfs_bnode_get(node);
spin_unlock(&tree->hash_lock);
- wait_event(node->lock_wq, !test_bit(HFS_BNODE_NEW, &node->flags));
+ wait_event(node->lock_wq,
+ !test_bit(HFS_BNODE_NEW, &node->flags));
if (test_bit(HFS_BNODE_ERROR, &node->flags))
goto node_error;
return node;
@@ -497,7 +506,8 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num)
if (!test_bit(HFS_BNODE_NEW, &node->flags))
return node;
- desc = (struct hfs_bnode_desc *)(kmap(node->page[0]) + node->page_offset);
+ desc = (struct hfs_bnode_desc *)(kmap(node->page[0]) +
+ node->page_offset);
node->prev = be32_to_cpu(desc->prev);
node->next = be32_to_cpu(desc->next);
node->num_recs = be16_to_cpu(desc->num_recs);
@@ -556,11 +566,13 @@ node_error:
void hfs_bnode_free(struct hfs_bnode *node)
{
- //int i;
+#if 0
+ int i;
- //for (i = 0; i < node->tree->pages_per_bnode; i++)
- // if (node->page[i])
- // page_cache_release(node->page[i]);
+ for (i = 0; i < node->tree->pages_per_bnode; i++)
+ if (node->page[i])
+ page_cache_release(node->page[i]);
+#endif
kfree(node);
}
@@ -607,7 +619,8 @@ void hfs_bnode_get(struct hfs_bnode *node)
if (node) {
atomic_inc(&node->refcnt);
dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n",
- node->tree->cnid, node->this, atomic_read(&node->refcnt));
+ node->tree->cnid, node->this,
+ atomic_read(&node->refcnt));
}
}
@@ -619,7 +632,8 @@ void hfs_bnode_put(struct hfs_bnode *node)
int i;
dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n",
- node->tree->cnid, node->this, atomic_read(&node->refcnt));
+ node->tree->cnid, node->this,
+ atomic_read(&node->refcnt));
BUG_ON(!atomic_read(&node->refcnt));
if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock))
return;
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 2f39d05..2312de3 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -39,7 +39,8 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec)
!(node->tree->attributes & HFS_TREE_VARIDXKEYS)) {
retval = node->tree->max_key_len + 2;
} else {
- recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2);
+ recoff = hfs_bnode_read_u16(node,
+ node->tree->node_size - (rec + 1) * 2);
if (!recoff)
return 0;
@@ -84,7 +85,8 @@ again:
end_rec_off = tree->node_size - (node->num_recs + 1) * 2;
end_off = hfs_bnode_read_u16(node, end_rec_off);
end_rec_off -= 2;
- dprint(DBG_BNODE_MOD, "insert_rec: %d, %d, %d, %d\n", rec, size, end_off, end_rec_off);
+ dprint(DBG_BNODE_MOD, "insert_rec: %d, %d, %d, %d\n",
+ rec, size, end_off, end_rec_off);
if (size > end_rec_off - end_off) {
if (new_node)
panic("not enough room!\n");
@@ -99,7 +101,9 @@ again:
}
node->num_recs++;
/* write new last offset */
- hfs_bnode_write_u16(node, offsetof(struct hfs_bnode_desc, num_recs), node->num_recs);
+ hfs_bnode_write_u16(node,
+ offsetof(struct hfs_bnode_desc, num_recs),
+ node->num_recs);
hfs_bnode_write_u16(node, end_rec_off, end_off + size);
data_off = end_off;
data_rec_off = end_rec_off + 2;
@@ -151,7 +155,8 @@ skip:
if (tree->attributes & HFS_TREE_VARIDXKEYS)
key_len = be16_to_cpu(fd->search_key->key_len) + 2;
else {
- fd->search_key->key_len = cpu_to_be16(tree->max_key_len);
+ fd->search_key->key_len =
+ cpu_to_be16(tree->max_key_len);
key_len = tree->max_key_len + 2;
}
goto again;
@@ -180,7 +185,8 @@ again:
mark_inode_dirty(tree->inode);
}
hfs_bnode_dump(node);
- dprint(DBG_BNODE_MOD, "remove_rec: %d, %d\n", fd->record, fd->keylength + fd->entrylength);
+ dprint(DBG_BNODE_MOD, "remove_rec: %d, %d\n",
+ fd->record, fd->keylength + fd->entrylength);
if (!--node->num_recs) {
hfs_bnode_unlink(node);
if (!node->parent)
@@ -194,7 +200,9 @@ again:
__hfs_brec_find(node, fd);
goto again;
}
- hfs_bnode_write_u16(node, offsetof(struct hfs_bnode_desc, num_recs), node->num_recs);
+ hfs_bnode_write_u16(node,
+ offsetof(struct hfs_bnode_desc, num_recs),
+ node->num_recs);
if (rec_off == end_off)
goto skip;
@@ -364,7 +372,8 @@ again:
newkeylen = hfs_bnode_read_u16(node, 14) + 2;
else
fd->keylength = newkeylen = tree->max_key_len + 2;
- dprint(DBG_BNODE_MOD, "update_rec: %d, %d, %d\n", rec, fd->keylength, newkeylen);
+ dprint(DBG_BNODE_MOD, "update_rec: %d, %d, %d\n",
+ rec, fd->keylength, newkeylen);
rec_off = tree->node_size - (rec + 2) * 2;
end_rec_off = tree->node_size - (parent->num_recs + 1) * 2;
@@ -375,7 +384,7 @@ again:
end_off = hfs_bnode_read_u16(parent, end_rec_off);
if (end_rec_off - end_off < diff) {
- printk(KERN_DEBUG "hfs: splitting index node...\n");
+ dprint(DBG_BNODE_MOD, "hfs: splitting index node.\n");
fd->bnode = parent;
new_node = hfs_bnode_split(fd);
if (IS_ERR(new_node))
@@ -383,7 +392,8 @@ again:
parent = fd->bnode;
rec = fd->record;
rec_off = tree->node_size - (rec + 2) * 2;
- end_rec_off = tree->node_size - (parent->num_recs + 1) * 2;
+ end_rec_off = tree->node_size -
+ (parent->num_recs + 1) * 2;
}
}
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 22e4d4e..21023d9 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -51,7 +51,8 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
goto free_inode;
/* Load the header */
- head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc));
+ head = (struct hfs_btree_header_rec *)(kmap(page) +
+ sizeof(struct hfs_bnode_desc));
tree->root = be32_to_cpu(head->root);
tree->leaf_count = be32_to_cpu(head->leaf_count);
tree->leaf_head = be32_to_cpu(head->leaf_head);
@@ -115,7 +116,9 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
tree->node_size_shift = ffs(size) - 1;
- tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ tree->pages_per_bnode =
+ (tree->node_size + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
kunmap(page);
page_cache_release(page);
@@ -144,8 +147,10 @@ void hfs_btree_close(struct hfs_btree *tree)
while ((node = tree->node_hash[i])) {
tree->node_hash[i] = node->next_hash;
if (atomic_read(&node->refcnt))
- printk(KERN_CRIT "hfs: node %d:%d still has %d user(s)!\n",
- node->tree->cnid, node->this, atomic_read(&node->refcnt));
+ printk(KERN_CRIT "hfs: node %d:%d "
+ "still has %d user(s)!\n",
+ node->tree->cnid, node->this,
+ atomic_read(&node->refcnt));
hfs_bnode_free(node);
tree->node_hash_cnt--;
}
@@ -166,7 +171,8 @@ void hfs_btree_write(struct hfs_btree *tree)
return;
/* Load the header */
page = node->page[0];
- head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc));
+ head = (struct hfs_btree_header_rec *)(kmap(page) +
+ sizeof(struct hfs_bnode_desc));
head->root = cpu_to_be32(tree->root);
head->leaf_count = cpu_to_be32(tree->leaf_count);
@@ -272,7 +278,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
tree->free_nodes--;
mark_inode_dirty(tree->inode);
hfs_bnode_put(node);
- return hfs_bnode_create(tree, idx);
+ return hfs_bnode_create(tree,
+ idx);
}
}
}
@@ -287,7 +294,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
kunmap(*pagep);
nidx = node->next;
if (!nidx) {
- printk(KERN_DEBUG "hfs: create new bmap node...\n");
+ dprint(DBG_BNODE_MOD, "hfs: create new bmap node.\n");
next_node = hfs_bmap_new_bmap(node, idx);
} else
next_node = hfs_bnode_find(tree, nidx);
@@ -329,7 +336,9 @@ void hfs_bmap_free(struct hfs_bnode *node)
hfs_bnode_put(node);
if (!i) {
/* panic */;
- printk(KERN_CRIT "hfs: unable to free bnode %u. bmap not found!\n", node->this);
+ printk(KERN_CRIT "hfs: unable to free bnode %u. "
+ "bmap not found!\n",
+ node->this);
return;
}
node = hfs_bnode_find(tree, i);
@@ -337,7 +346,9 @@ void hfs_bmap_free(struct hfs_bnode *node)
return;
if (node->type != HFS_NODE_MAP) {
/* panic */;
- printk(KERN_CRIT "hfs: invalid bmap found! (%u,%d)\n", node->this, node->type);
+ printk(KERN_CRIT "hfs: invalid bmap found! "
+ "(%u,%d)\n",
+ node->this, node->type);
hfs_bnode_put(node);
return;
}
@@ -350,7 +361,9 @@ void hfs_bmap_free(struct hfs_bnode *node)
m = 1 << (~nidx & 7);
byte = data[off];
if (!(byte & m)) {
- printk(KERN_CRIT "hfs: trying to free free bnode %u(%d)\n", node->this, node->type);
+ printk(KERN_CRIT "hfs: trying to free free bnode "
+ "%u(%d)\n",
+ node->this, node->type);
kunmap(page);
hfs_bnode_put(node);
return;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 8af45fc..b4ba1b3 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -91,7 +91,8 @@ void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms)
perms->dev = 0;
}
-static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode)
+static int hfsplus_cat_build_record(hfsplus_cat_entry *entry,
+ u32 cnid, struct inode *inode)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
@@ -128,20 +129,32 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i
if (cnid == inode->i_ino) {
hfsplus_cat_set_perms(inode, &file->permissions);
if (S_ISLNK(inode->i_mode)) {
- file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE);
- file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR);
+ file->user_info.fdType =
+ cpu_to_be32(HFSP_SYMLINK_TYPE);
+ file->user_info.fdCreator =
+ cpu_to_be32(HFSP_SYMLINK_CREATOR);
} else {
- file->user_info.fdType = cpu_to_be32(sbi->type);
- file->user_info.fdCreator = cpu_to_be32(sbi->creator);
+ file->user_info.fdType =
+ cpu_to_be32(sbi->type);
+ file->user_info.fdCreator =
+ cpu_to_be32(sbi->creator);
}
- if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE)
- file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED);
+ if (HFSPLUS_FLG_IMMUTABLE &
+ (file->permissions.rootflags |
+ file->permissions.userflags))
+ file->flags |=
+ cpu_to_be16(HFSPLUS_FILE_LOCKED);
} else {
- file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE);
- file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR);
- file->user_info.fdFlags = cpu_to_be16(0x100);
- file->create_date = HFSPLUS_I(sbi->hidden_dir)->create_date;
- file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode)->linkid);
+ file->user_info.fdType =
+ cpu_to_be32(HFSP_HARDLINK_TYPE);
+ file->user_info.fdCreator =
+ cpu_to_be32(HFSP_HFSPLUS_CREATOR);
+ file->user_info.fdFlags =
+ cpu_to_be16(0x100);
+ file->create_date =
+ HFSPLUS_I(sbi->hidden_dir)->create_date;
+ file->permissions.dev =
+ cpu_to_be32(HFSPLUS_I(inode)->linkid);
}
return sizeof(*file);
}
@@ -182,12 +195,14 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
return -EIO;
}
- hfsplus_cat_build_key_uni(fd->search_key, be32_to_cpu(tmp.thread.parentID),
- &tmp.thread.nodeName);
+ hfsplus_cat_build_key_uni(fd->search_key,
+ be32_to_cpu(tmp.thread.parentID),
+ &tmp.thread.nodeName);
return hfs_brec_find(fd);
}
-int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode)
+int hfsplus_create_cat(u32 cnid, struct inode *dir,
+ struct qstr *str, struct inode *inode)
{
struct super_block *sb = dir->i_sb;
struct hfs_find_data fd;
@@ -195,13 +210,15 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct ino
int entry_size;
int err;
- dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink);
+ dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n",
+ str->name, cnid, inode->i_nlink);
hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
- entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ?
+ entry_size = hfsplus_fill_cat_thread(sb, &entry,
+ S_ISDIR(inode->i_mode) ?
HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
- dir->i_ino, str);
+ dir->i_ino, str);
err = hfs_brec_find(&fd);
if (err != -ENOENT) {
if (!err)
@@ -227,7 +244,8 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct ino
dir->i_size++;
dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(dir);
+ hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
+
hfs_find_exit(&fd);
return 0;
@@ -249,7 +267,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
int err, off;
u16 type;
- dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid);
+ dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n",
+ str ? str->name : NULL, cnid);
hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
if (!str) {
@@ -260,11 +279,15 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (err)
goto out;
- off = fd.entryoffset + offsetof(struct hfsplus_cat_thread, nodeName);
+ off = fd.entryoffset +
+ offsetof(struct hfsplus_cat_thread, nodeName);
fd.search_key->cat.parent = cpu_to_be32(dir->i_ino);
- hfs_bnode_read(fd.bnode, &fd.search_key->cat.name.length, off, 2);
+ hfs_bnode_read(fd.bnode,
+ &fd.search_key->cat.name.length, off, 2);
len = be16_to_cpu(fd.search_key->cat.name.length) * 2;
- hfs_bnode_read(fd.bnode, &fd.search_key->cat.name.unicode, off + 2, len);
+ hfs_bnode_read(fd.bnode,
+ &fd.search_key->cat.name.unicode,
+ off + 2, len);
fd.search_key->key_len = cpu_to_be16(6 + len);
} else
hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
@@ -281,7 +304,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_DATA);
#endif
- off = fd.entryoffset + offsetof(struct hfsplus_cat_file, rsrc_fork);
+ off = fd.entryoffset +
+ offsetof(struct hfsplus_cat_file, rsrc_fork);
hfs_bnode_read(fd.bnode, &fork, off, sizeof(fork));
hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC);
}
@@ -308,7 +332,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
dir->i_size--;
dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(dir);
+ hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
out:
hfs_find_exit(&fd);
@@ -325,7 +349,8 @@ int hfsplus_rename_cat(u32 cnid,
int entry_size, type;
int err = 0;
- dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name,
+ dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n",
+ cnid, src_dir->i_ino, src_name->name,
dst_dir->i_ino, dst_name->name);
hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd);
dst_fd = src_fd;
@@ -353,7 +378,6 @@ int hfsplus_rename_cat(u32 cnid,
goto out;
dst_dir->i_size++;
dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(dst_dir);
/* finally remove the old entry */
hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
@@ -365,7 +389,6 @@ int hfsplus_rename_cat(u32 cnid,
goto out;
src_dir->i_size--;
src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(src_dir);
/* remove old thread entry */
hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
@@ -379,7 +402,8 @@ int hfsplus_rename_cat(u32 cnid,
/* create new thread entry */
hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
- entry_size = hfsplus_fill_cat_thread(sb, &entry, type, dst_dir->i_ino, dst_name);
+ entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
+ dst_dir->i_ino, dst_name);
err = hfs_brec_find(&dst_fd);
if (err != -ENOENT) {
if (!err)
@@ -387,6 +411,9 @@ int hfsplus_rename_cat(u32 cnid,
goto out;
}
err = hfs_brec_insert(&dst_fd, &entry, entry_size);
+
+ hfsplus_mark_inode_dirty(dst_dir, HFSPLUS_I_CAT_DIRTY);
+ hfsplus_mark_inode_dirty(src_dir, HFSPLUS_I_CAT_DIRTY);
out:
hfs_bnode_put(dst_fd.bnode);
hfs_find_exit(&src_fd);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index e318bbc..f896dc84 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
sb = dir->i_sb;
- dentry->d_op = &hfsplus_dentry_operations;
+ d_set_d_op(dentry, &hfsplus_dentry_operations);
dentry->d_fsdata = NULL;
hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
@@ -66,11 +66,17 @@ again:
goto fail;
}
cnid = be32_to_cpu(entry.file.id);
- if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) &&
- entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) &&
- (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->create_date ||
- entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode)->create_date) &&
- HFSPLUS_SB(sb)->hidden_dir) {
+ if (entry.file.user_info.fdType ==
+ cpu_to_be32(HFSP_HARDLINK_TYPE) &&
+ entry.file.user_info.fdCreator ==
+ cpu_to_be32(HFSP_HFSPLUS_CREATOR) &&
+ (entry.file.create_date ==
+ HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->
+ create_date ||
+ entry.file.create_date ==
+ HFSPLUS_I(sb->s_root->d_inode)->
+ create_date) &&
+ HFSPLUS_SB(sb)->hidden_dir) {
struct qstr str;
char name[32];
@@ -83,11 +89,13 @@ again:
linkid = 0;
} else {
dentry->d_fsdata = (void *)(unsigned long)cnid;
- linkid = be32_to_cpu(entry.file.permissions.dev);
+ linkid =
+ be32_to_cpu(entry.file.permissions.dev);
str.len = sprintf(name, "iNode%d", linkid);
str.name = name;
hfsplus_cat_build_key(sb, fd.search_key,
- HFSPLUS_SB(sb)->hidden_dir->i_ino, &str);
+ HFSPLUS_SB(sb)->hidden_dir->i_ino,
+ &str);
goto again;
}
} else if (!dentry->d_fsdata)
@@ -139,7 +147,8 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
filp->f_pos++;
/* fall through */
case 1:
- hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
+ hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
+ fd.entrylength);
if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) {
printk(KERN_ERR "hfs: bad catalog folder thread\n");
err = -EIO;
@@ -169,14 +178,16 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
err = -EIO;
goto out;
}
- hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
+ hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
+ fd.entrylength);
type = be16_to_cpu(entry.type);
len = HFSPLUS_MAX_STRLEN;
err = hfsplus_uni2asc(sb, &fd.key->cat.name, strbuf, &len);
if (err)
goto out;
if (type == HFSPLUS_FOLDER) {
- if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) {
+ if (fd.entrylength <
+ sizeof(struct hfsplus_cat_folder)) {
printk(KERN_ERR "hfs: small dir entry\n");
err = -EIO;
goto out;
@@ -202,7 +213,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
err = -EIO;
goto out;
}
- next:
+next:
filp->f_pos++;
if (filp->f_pos >= inode->i_size)
goto out;
@@ -273,7 +284,8 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
HFSPLUS_I(inode)->linkid = id;
cnid = sbi->next_cnid++;
src_dentry->d_fsdata = (void *)(unsigned long)cnid;
- res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode);
+ res = hfsplus_create_cat(cnid, src_dir,
+ &src_dentry->d_name, inode);
if (res)
/* panic? */
goto out;
@@ -317,8 +329,10 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
res = hfsplus_rename_cat(inode->i_ino,
dir, &dentry->d_name,
sbi->hidden_dir, &str);
- if (!res)
+ if (!res) {
inode->i_flags |= S_DEAD;
+ drop_nlink(inode);
+ }
goto out;
}
res = hfsplus_delete_cat(cnid, dir, &dentry->d_name);
@@ -483,6 +497,7 @@ const struct inode_operations hfsplus_dir_inode_operations = {
};
const struct file_operations hfsplus_dir_operations = {
+ .fsync = hfsplus_file_fsync,
.read = generic_read_dir,
.readdir = hfsplus_readdir,
.unlocked_ioctl = hfsplus_ioctl,
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 0c9cb18..52a0bca 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -83,7 +83,8 @@ static u32 hfsplus_ext_lastblock(struct hfsplus_extent *ext)
return be32_to_cpu(ext->start_block) + be32_to_cpu(ext->block_count);
}
-static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd)
+static void __hfsplus_ext_write_extent(struct inode *inode,
+ struct hfs_find_data *fd)
{
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
int res;
@@ -95,24 +96,32 @@ static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data
HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA);
res = hfs_brec_find(fd);
- if (hip->flags & HFSPLUS_FLG_EXT_NEW) {
+ if (hip->extent_state & HFSPLUS_EXT_NEW) {
if (res != -ENOENT)
return;
hfs_brec_insert(fd, hip->cached_extents,
sizeof(hfsplus_extent_rec));
- hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW);
+ hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
} else {
if (res)
return;
hfs_bnode_write(fd->bnode, hip->cached_extents,
fd->entryoffset, fd->entrylength);
- hip->flags &= ~HFSPLUS_FLG_EXT_DIRTY;
+ hip->extent_state &= ~HFSPLUS_EXT_DIRTY;
}
+
+ /*
+ * We can't just use hfsplus_mark_inode_dirty here, because we
+ * also get called from hfsplus_write_inode, which should not
+ * redirty the inode. Instead the callers have to be careful
+ * to explicily mark the inode dirty, too.
+ */
+ set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags);
}
static void hfsplus_ext_write_extent_locked(struct inode *inode)
{
- if (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_EXT_DIRTY) {
+ if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) {
struct hfs_find_data fd;
hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
@@ -144,18 +153,20 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd,
return -ENOENT;
if (fd->entrylength != sizeof(hfsplus_extent_rec))
return -EIO;
- hfs_bnode_read(fd->bnode, extent, fd->entryoffset, sizeof(hfsplus_extent_rec));
+ hfs_bnode_read(fd->bnode, extent, fd->entryoffset,
+ sizeof(hfsplus_extent_rec));
return 0;
}
-static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block)
+static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd,
+ struct inode *inode, u32 block)
{
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
int res;
WARN_ON(!mutex_is_locked(&hip->extents_lock));
- if (hip->flags & HFSPLUS_FLG_EXT_DIRTY)
+ if (hip->extent_state & HFSPLUS_EXT_DIRTY)
__hfsplus_ext_write_extent(inode, fd);
res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino,
@@ -164,10 +175,11 @@ static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct in
HFSPLUS_TYPE_DATA);
if (!res) {
hip->cached_start = be32_to_cpu(fd->key->ext.start_block);
- hip->cached_blocks = hfsplus_ext_block_count(hip->cached_extents);
+ hip->cached_blocks =
+ hfsplus_ext_block_count(hip->cached_extents);
} else {
hip->cached_start = hip->cached_blocks = 0;
- hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW);
+ hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
}
return res;
}
@@ -197,6 +209,7 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
int res = -EIO;
u32 ablock, dblock, mask;
+ int was_dirty = 0;
int shift;
/* Convert inode block to disk allocation block */
@@ -223,27 +236,37 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
return -EIO;
mutex_lock(&hip->extents_lock);
+
+ /*
+ * hfsplus_ext_read_extent will write out a cached extent into
+ * the extents btree. In that case we may have to mark the inode
+ * dirty even for a pure read of an extent here.
+ */
+ was_dirty = (hip->extent_state & HFSPLUS_EXT_DIRTY);
res = hfsplus_ext_read_extent(inode, ablock);
- if (!res) {
- dblock = hfsplus_ext_find_block(hip->cached_extents,
- ablock - hip->cached_start);
- } else {
+ if (res) {
mutex_unlock(&hip->extents_lock);
return -EIO;
}
+ dblock = hfsplus_ext_find_block(hip->cached_extents,
+ ablock - hip->cached_start);
mutex_unlock(&hip->extents_lock);
done:
- dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock);
+ dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n",
+ inode->i_ino, (long long)iblock, dblock);
mask = (1 << sbi->fs_shift) - 1;
- map_bh(bh_result, sb, (dblock << sbi->fs_shift) + sbi->blockoffset + (iblock & mask));
+ map_bh(bh_result, sb,
+ (dblock << sbi->fs_shift) + sbi->blockoffset +
+ (iblock & mask));
if (create) {
set_buffer_new(bh_result);
hip->phys_size += sb->s_blocksize;
hip->fs_blocks++;
inode_add_bytes(inode, sb->s_blocksize);
- mark_inode_dirty(inode);
}
+ if (create || was_dirty)
+ mark_inode_dirty(inode);
return 0;
}
@@ -326,7 +349,8 @@ found:
}
}
-int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw *fork, int type)
+int hfsplus_free_fork(struct super_block *sb, u32 cnid,
+ struct hfsplus_fork_raw *fork, int type)
{
struct hfs_find_data fd;
hfsplus_extent_rec ext_entry;
@@ -373,12 +397,13 @@ int hfsplus_file_extend(struct inode *inode)
u32 start, len, goal;
int res;
- if (sbi->alloc_file->i_size * 8 <
- sbi->total_blocks - sbi->free_blocks + 8) {
- // extend alloc file
- printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n",
- sbi->alloc_file->i_size * 8,
- sbi->total_blocks, sbi->free_blocks);
+ if (sbi->total_blocks - sbi->free_blocks + 8 >
+ sbi->alloc_file->i_size * 8) {
+ /* extend alloc file */
+ printk(KERN_ERR "hfs: extend alloc file! "
+ "(%llu,%u,%u)\n",
+ sbi->alloc_file->i_size * 8,
+ sbi->total_blocks, sbi->free_blocks);
return -ENOSPC;
}
@@ -429,7 +454,7 @@ int hfsplus_file_extend(struct inode *inode)
start, len);
if (!res) {
hfsplus_dump_extent(hip->cached_extents);
- hip->flags |= HFSPLUS_FLG_EXT_DIRTY;
+ hip->extent_state |= HFSPLUS_EXT_DIRTY;
hip->cached_blocks += len;
} else if (res == -ENOSPC)
goto insert_extent;
@@ -438,7 +463,7 @@ out:
mutex_unlock(&hip->extents_lock);
if (!res) {
hip->alloc_blocks += len;
- mark_inode_dirty(inode);
+ hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY);
}
return res;
@@ -450,7 +475,7 @@ insert_extent:
hip->cached_extents[0].start_block = cpu_to_be32(start);
hip->cached_extents[0].block_count = cpu_to_be32(len);
hfsplus_dump_extent(hip->cached_extents);
- hip->flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW;
+ hip->extent_state |= HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW;
hip->cached_start = hip->alloc_blocks;
hip->cached_blocks = len;
@@ -466,8 +491,9 @@ void hfsplus_file_truncate(struct inode *inode)
u32 alloc_cnt, blk_cnt, start;
int res;
- dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n",
- inode->i_ino, (long long)hip->phys_size, inode->i_size);
+ dprint(DBG_INODE, "truncate: %lu, %llu -> %llu\n",
+ inode->i_ino, (long long)hip->phys_size,
+ inode->i_size);
if (inode->i_size > hip->phys_size) {
struct address_space *mapping = inode->i_mapping;
@@ -481,7 +507,8 @@ void hfsplus_file_truncate(struct inode *inode)
&page, &fsdata);
if (res)
return;
- res = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
+ res = pagecache_write_end(NULL, mapping, size,
+ 0, 0, page, fsdata);
if (res < 0)
return;
mark_inode_dirty(inode);
@@ -513,12 +540,12 @@ void hfsplus_file_truncate(struct inode *inode)
alloc_cnt - start, alloc_cnt - blk_cnt);
hfsplus_dump_extent(hip->cached_extents);
if (blk_cnt > start) {
- hip->flags |= HFSPLUS_FLG_EXT_DIRTY;
+ hip->extent_state |= HFSPLUS_EXT_DIRTY;
break;
}
alloc_cnt = start;
hip->cached_start = hip->cached_blocks = 0;
- hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW);
+ hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
hfs_brec_remove(&fd);
}
hfs_find_exit(&fd);
@@ -527,7 +554,8 @@ void hfsplus_file_truncate(struct inode *inode)
hip->alloc_blocks = blk_cnt;
out:
hip->phys_size = inode->i_size;
- hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+ hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >>
+ sb->s_blocksize_bits;
inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits);
- mark_inode_dirty(inode);
+ hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY);
}
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index cb3653e..d685752 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -23,13 +23,16 @@
#define DBG_EXTENT 0x00000020
#define DBG_BITMAP 0x00000040
-//#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD)
-//#define DBG_MASK (DBG_BNODE_MOD|DBG_CAT_MOD|DBG_INODE)
-//#define DBG_MASK (DBG_CAT_MOD|DBG_BNODE_REFS|DBG_INODE|DBG_EXTENT)
+#if 0
+#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD)
+#define DBG_MASK (DBG_BNODE_MOD|DBG_CAT_MOD|DBG_INODE)
+#define DBG_MASK (DBG_CAT_MOD|DBG_BNODE_REFS|DBG_INODE|DBG_EXTENT)
+#endif
#define DBG_MASK (0)
#define dprint(flg, fmt, args...) \
- if (flg & DBG_MASK) printk(fmt , ## args)
+ if (flg & DBG_MASK) \
+ printk(fmt , ## args)
/* Runtime config options */
#define HFSPLUS_DEF_CR_TYPE 0x3F3F3F3F /* '????' */
@@ -37,7 +40,8 @@
#define HFSPLUS_TYPE_DATA 0x00
#define HFSPLUS_TYPE_RSRC 0xFF
-typedef int (*btree_keycmp)(const hfsplus_btree_key *, const hfsplus_btree_key *);
+typedef int (*btree_keycmp)(const hfsplus_btree_key *,
+ const hfsplus_btree_key *);
#define NODE_HASH_SIZE 256
@@ -61,7 +65,6 @@ struct hfs_btree {
unsigned int max_key_len;
unsigned int depth;
- //unsigned int map1_size, map_size;
struct mutex tree_lock;
unsigned int pages_per_bnode;
@@ -107,8 +110,8 @@ struct hfsplus_vh;
struct hfs_btree;
struct hfsplus_sb_info {
- struct buffer_head *s_vhbh;
struct hfsplus_vh *s_vhdr;
+ struct hfsplus_vh *s_backup_vhdr;
struct hfs_btree *ext_tree;
struct hfs_btree *cat_tree;
struct hfs_btree *attr_tree;
@@ -118,7 +121,8 @@ struct hfsplus_sb_info {
/* Runtime variables */
u32 blockoffset;
- u32 sect_count;
+ sector_t part_start;
+ sector_t sect_count;
int fs_shift;
/* immutable data from the volume header */
@@ -155,6 +159,12 @@ struct hfsplus_sb_info {
#define HFSPLUS_SB_FORCE 2
#define HFSPLUS_SB_HFSX 3
#define HFSPLUS_SB_CASEFOLD 4
+#define HFSPLUS_SB_NOBARRIER 5
+
+static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
struct hfsplus_inode_info {
@@ -170,7 +180,7 @@ struct hfsplus_inode_info {
u32 cached_blocks;
hfsplus_extent_rec first_extents;
hfsplus_extent_rec cached_extents;
- unsigned long flags;
+ unsigned int extent_state;
struct mutex extents_lock;
/*
@@ -185,6 +195,11 @@ struct hfsplus_inode_info {
u32 linkid;
/*
+ * Accessed using atomic bitops.
+ */
+ unsigned long flags;
+
+ /*
* Protected by i_mutex.
*/
sector_t fs_blocks;
@@ -195,12 +210,34 @@ struct hfsplus_inode_info {
struct inode vfs_inode;
};
-#define HFSPLUS_FLG_RSRC 0x0001
-#define HFSPLUS_FLG_EXT_DIRTY 0x0002
-#define HFSPLUS_FLG_EXT_NEW 0x0004
+#define HFSPLUS_EXT_DIRTY 0x0001
+#define HFSPLUS_EXT_NEW 0x0002
+
+#define HFSPLUS_I_RSRC 0 /* represents a resource fork */
+#define HFSPLUS_I_CAT_DIRTY 1 /* has changes in the catalog tree */
+#define HFSPLUS_I_EXT_DIRTY 2 /* has changes in the extent tree */
+#define HFSPLUS_I_ALLOC_DIRTY 3 /* has changes in the allocation file */
+
+#define HFSPLUS_IS_RSRC(inode) \
+ test_bit(HFSPLUS_I_RSRC, &HFSPLUS_I(inode)->flags)
+
+static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode)
+{
+ return list_entry(inode, struct hfsplus_inode_info, vfs_inode);
+}
-#define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC))
-#define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC)
+/*
+ * Mark an inode dirty, and also mark the btree in which the
+ * specific type of metadata is stored.
+ * For data or metadata that gets written back by into the catalog btree
+ * by hfsplus_write_inode a plain mark_inode_dirty call is enough.
+ */
+static inline void hfsplus_mark_inode_dirty(struct inode *inode,
+ unsigned int flag)
+{
+ set_bit(flag, &HFSPLUS_I(inode)->flags);
+ mark_inode_dirty(inode);
+}
struct hfs_find_data {
/* filled by caller */
@@ -318,9 +355,12 @@ int hfs_brec_read(struct hfs_find_data *, void *, int);
int hfs_brec_goto(struct hfs_find_data *, int);
/* catalog.c */
-int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
-int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *, u32, struct qstr *);
+int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *,
+ const hfsplus_btree_key *);
+int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *,
+ const hfsplus_btree_key *);
+void hfsplus_cat_build_key(struct super_block *sb,
+ hfsplus_btree_key *, u32, struct qstr *);
int hfsplus_find_cat(struct super_block *, u32, struct hfs_find_data *);
int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *);
int hfsplus_delete_cat(u32, struct inode *, struct qstr *);
@@ -336,7 +376,8 @@ extern const struct file_operations hfsplus_dir_operations;
int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
void hfsplus_ext_write_extent(struct inode *);
int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int);
-int hfsplus_free_fork(struct super_block *, u32, struct hfsplus_fork_raw *, int);
+int hfsplus_free_fork(struct super_block *, u32,
+ struct hfsplus_fork_raw *, int);
int hfsplus_file_extend(struct inode *);
void hfsplus_file_truncate(struct inode *);
@@ -351,6 +392,7 @@ int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *);
int hfsplus_cat_write_inode(struct inode *);
struct inode *hfsplus_new_inode(struct super_block *, int);
void hfsplus_delete_inode(struct inode *);
+int hfsplus_file_fsync(struct file *file, int datasync);
/* ioctl.c */
long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
@@ -362,6 +404,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* options.c */
int hfsplus_parse_options(char *, struct hfsplus_sb_info *);
+int hfsplus_parse_options_remount(char *input, int *force);
void hfsplus_fill_defaults(struct hfsplus_sb_info *);
int hfsplus_show_options(struct seq_file *, struct vfsmount *);
@@ -375,45 +418,26 @@ extern u16 hfsplus_decompose_table[];
extern u16 hfsplus_compose_table[];
/* unicode.c */
-int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *);
-int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *);
-int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *);
-int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int);
-int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str);
-int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2);
+int hfsplus_strcasecmp(const struct hfsplus_unistr *,
+ const struct hfsplus_unistr *);
+int hfsplus_strcmp(const struct hfsplus_unistr *,
+ const struct hfsplus_unistr *);
+int hfsplus_uni2asc(struct super_block *,
+ const struct hfsplus_unistr *, char *, int *);
+int hfsplus_asc2uni(struct super_block *,
+ struct hfsplus_unistr *, const char *, int);
+int hfsplus_hash_dentry(const struct dentry *dentry,
+ const struct inode *inode, struct qstr *str);
+int hfsplus_compare_dentry(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
/* wrapper.c */
int hfsplus_read_wrapper(struct super_block *);
-
int hfs_part_find(struct super_block *, sector_t *, sector_t *);
-
-/* access macros */
-static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb)
-{
- return sb->s_fs_info;
-}
-
-static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode)
-{
- return list_entry(inode, struct hfsplus_inode_info, vfs_inode);
-}
-
-#define sb_bread512(sb, sec, data) ({ \
- struct buffer_head *__bh; \
- sector_t __block; \
- loff_t __start; \
- int __offset; \
- \
- __start = (loff_t)(sec) << HFSPLUS_SECTOR_SHIFT;\
- __block = __start >> (sb)->s_blocksize_bits; \
- __offset = __start & ((sb)->s_blocksize - 1); \
- __bh = sb_bread((sb), __block); \
- if (likely(__bh != NULL)) \
- data = (void *)(__bh->b_data + __offset);\
- else \
- data = NULL; \
- __bh; \
-})
+int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
+ void *data, int rw);
/* time macros */
#define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U)
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 6892899..927cdd6 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -36,7 +36,8 @@
#define HFSP_WRAPOFF_EMBEDSIG 0x7C
#define HFSP_WRAPOFF_EMBEDEXT 0x7E
-#define HFSP_HIDDENDIR_NAME "\xe2\x90\x80\xe2\x90\x80\xe2\x90\x80\xe2\x90\x80HFS+ Private Data"
+#define HFSP_HIDDENDIR_NAME \
+ "\xe2\x90\x80\xe2\x90\x80\xe2\x90\x80\xe2\x90\x80HFS+ Private Data"
#define HFSP_HARDLINK_TYPE 0x686c6e6b /* 'hlnk' */
#define HFSP_HFSPLUS_CREATOR 0x6866732b /* 'hfs+' */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 8afd7e8..a8df651 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -8,6 +8,7 @@
* Inode handling routines
*/
+#include <linux/blkdev.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -77,7 +78,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
if (!tree)
return 0;
if (tree->node_size >= PAGE_CACHE_SIZE) {
- nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT);
+ nidx = page->index >>
+ (tree->node_size_shift - PAGE_CACHE_SHIFT);
spin_lock(&tree->hash_lock);
node = hfs_bnode_findhash(tree, nidx);
if (!node)
@@ -90,7 +92,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
}
spin_unlock(&tree->hash_lock);
} else {
- nidx = page->index << (PAGE_CACHE_SHIFT - tree->node_size_shift);
+ nidx = page->index <<
+ (PAGE_CACHE_SHIFT - tree->node_size_shift);
i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift);
spin_lock(&tree->hash_lock);
do {
@@ -166,8 +169,8 @@ const struct dentry_operations hfsplus_dentry_operations = {
.d_compare = hfsplus_compare_dentry,
};
-static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dentry,
- struct nameidata *nd)
+static struct dentry *hfsplus_file_lookup(struct inode *dir,
+ struct dentry *dentry, struct nameidata *nd)
{
struct hfs_find_data fd;
struct super_block *sb = dir->i_sb;
@@ -190,7 +193,9 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
inode->i_ino = dir->i_ino;
INIT_LIST_HEAD(&hip->open_dir_list);
mutex_init(&hip->extents_lock);
- hip->flags = HFSPLUS_FLG_RSRC;
+ hip->extent_state = 0;
+ hip->flags = 0;
+ set_bit(HFSPLUS_I_RSRC, &hip->flags);
hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
err = hfsplus_find_cat(sb, dir->i_ino, &fd);
@@ -219,7 +224,8 @@ out:
return NULL;
}
-static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir)
+static void hfsplus_get_perms(struct inode *inode,
+ struct hfsplus_perm *perms, int dir)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
u16 mode;
@@ -302,29 +308,41 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
return 0;
}
-static int hfsplus_file_fsync(struct file *filp, int datasync)
+int hfsplus_file_fsync(struct file *file, int datasync)
{
- struct inode *inode = filp->f_mapping->host;
- struct super_block * sb;
- int ret, err;
-
- /* sync the inode to buffers */
- ret = write_inode_now(inode, 0);
-
- /* sync the superblock to buffers */
- sb = inode->i_sb;
- if (sb->s_dirt) {
- if (!(sb->s_flags & MS_RDONLY))
- hfsplus_sync_fs(sb, 1);
- else
- sb->s_dirt = 0;
+ struct inode *inode = file->f_mapping->host;
+ struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
+ struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
+ int error = 0, error2;
+
+ /*
+ * Sync inode metadata into the catalog and extent trees.
+ */
+ sync_inode_metadata(inode, 1);
+
+ /*
+ * And explicitly write out the btrees.
+ */
+ if (test_and_clear_bit(HFSPLUS_I_CAT_DIRTY, &hip->flags))
+ error = filemap_write_and_wait(sbi->cat_tree->inode->i_mapping);
+
+ if (test_and_clear_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags)) {
+ error2 =
+ filemap_write_and_wait(sbi->ext_tree->inode->i_mapping);
+ if (!error)
+ error = error2;
}
- /* .. finally sync the buffers to disk */
- err = sync_blockdev(sb->s_bdev);
- if (!ret)
- ret = err;
- return ret;
+ if (test_and_clear_bit(HFSPLUS_I_ALLOC_DIRTY, &hip->flags)) {
+ error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping);
+ if (!error)
+ error = error2;
+ }
+
+ if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+
+ return error;
}
static const struct inode_operations hfsplus_file_inode_operations = {
@@ -337,7 +355,7 @@ static const struct inode_operations hfsplus_file_inode_operations = {
};
static const struct file_operations hfsplus_file_operations = {
- .llseek = generic_file_llseek,
+ .llseek = generic_file_llseek,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.write = do_sync_write,
@@ -370,6 +388,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
INIT_LIST_HEAD(&hip->open_dir_list);
mutex_init(&hip->extents_lock);
atomic_set(&hip->opencnt, 0);
+ hip->extent_state = 0;
hip->flags = 0;
memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec));
memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
@@ -457,7 +476,8 @@ void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork)
}
}
-void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork)
+void hfsplus_inode_write_fork(struct inode *inode,
+ struct hfsplus_fork_raw *fork)
{
memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents,
sizeof(hfsplus_extent_rec));
@@ -499,13 +519,14 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
hfs_bnode_read(fd->bnode, &entry, fd->entryoffset,
sizeof(struct hfsplus_cat_file));
- hfsplus_inode_read_fork(inode, HFSPLUS_IS_DATA(inode) ?
- &file->data_fork : &file->rsrc_fork);
+ hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ?
+ &file->rsrc_fork : &file->data_fork);
hfsplus_get_perms(inode, &file->permissions, 0);
inode->i_nlink = 1;
if (S_ISREG(inode->i_mode)) {
if (file->permissions.dev)
- inode->i_nlink = be32_to_cpu(file->permissions.dev);
+ inode->i_nlink =
+ be32_to_cpu(file->permissions.dev);
inode->i_op = &hfsplus_file_inode_operations;
inode->i_fop = &hfsplus_file_operations;
inode->i_mapping->a_ops = &hfsplus_aops;
@@ -578,7 +599,9 @@ int hfsplus_cat_write_inode(struct inode *inode)
sizeof(struct hfsplus_cat_file));
hfsplus_inode_write_fork(inode, &file->data_fork);
hfsplus_cat_set_perms(inode, &file->permissions);
- if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE)
+ if (HFSPLUS_FLG_IMMUTABLE &
+ (file->permissions.rootflags |
+ file->permissions.userflags))
file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED);
else
file->flags &= cpu_to_be16(~HFSPLUS_FILE_LOCKED);
@@ -588,6 +611,8 @@ int hfsplus_cat_write_inode(struct inode *inode)
hfs_bnode_write(fd.bnode, &entry, fd.entryoffset,
sizeof(struct hfsplus_cat_file));
}
+
+ set_bit(HFSPLUS_I_CAT_DIRTY, &HFSPLUS_I(inode)->flags);
out:
hfs_find_exit(&fd);
return 0;
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 5b4667e..508ce66 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -28,7 +28,7 @@ static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
if (inode->i_flags & S_IMMUTABLE)
flags |= FS_IMMUTABLE_FL;
- if (inode->i_flags |= S_APPEND)
+ if (inode->i_flags & S_APPEND)
flags |= FS_APPEND_FL;
if (hip->userflags & HFSPLUS_FLG_NODUMP)
flags |= FS_NODUMP_FL;
@@ -92,7 +92,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
mark_inode_dirty(inode);
out_unlock_inode:
- mutex_lock(&inode->i_mutex);
+ mutex_unlock(&inode->i_mutex);
out_drop_write:
mnt_drop_write(file->f_path.mnt);
out:
@@ -147,9 +147,11 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name,
res = -ERANGE;
} else
res = -EOPNOTSUPP;
- if (!res)
+ if (!res) {
hfs_bnode_write(fd.bnode, &entry, fd.entryoffset,
sizeof(struct hfsplus_cat_file));
+ hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
+ }
out:
hfs_find_exit(&fd);
return res;
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index f9ab276..bb62a5882 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -23,6 +23,7 @@ enum {
opt_umask, opt_uid, opt_gid,
opt_part, opt_session, opt_nls,
opt_nodecompose, opt_decompose,
+ opt_barrier, opt_nobarrier,
opt_force, opt_err
};
@@ -37,6 +38,8 @@ static const match_table_t tokens = {
{ opt_nls, "nls=%s" },
{ opt_decompose, "decompose" },
{ opt_nodecompose, "nodecompose" },
+ { opt_barrier, "barrier" },
+ { opt_nobarrier, "nobarrier" },
{ opt_force, "force" },
{ opt_err, NULL }
};
@@ -65,6 +68,32 @@ static inline int match_fourchar(substring_t *arg, u32 *result)
return 0;
}
+int hfsplus_parse_options_remount(char *input, int *force)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int token;
+
+ if (!input)
+ return 0;
+
+ while ((p = strsep(&input, ",")) != NULL) {
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case opt_force:
+ *force = 1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 1;
+}
+
/* Parse options from mount. Returns 0 on failure */
/* input is the options passed to mount() as a string */
int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
@@ -136,7 +165,9 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
if (p)
sbi->nls = load_nls(p);
if (!sbi->nls) {
- printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p);
+ printk(KERN_ERR "hfs: unable to load "
+ "nls mapping \"%s\"\n",
+ p);
kfree(p);
return 0;
}
@@ -148,6 +179,12 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
case opt_nodecompose:
set_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags);
break;
+ case opt_barrier:
+ clear_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags);
+ break;
+ case opt_nobarrier:
+ set_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags);
+ break;
case opt_force:
set_bit(HFSPLUS_SB_FORCE, &sbi->flags);
break;
@@ -177,7 +214,8 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt)
seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator);
if (sbi->type != HFSPLUS_DEF_CR_TYPE)
seq_printf(seq, ",type=%.4s", (char *)&sbi->type);
- seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, sbi->uid, sbi->gid);
+ seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask,
+ sbi->uid, sbi->gid);
if (sbi->part >= 0)
seq_printf(seq, ",part=%u", sbi->part);
if (sbi->session >= 0)
@@ -186,5 +224,7 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt)
seq_printf(seq, ",nls=%s", sbi->nls->charset);
if (test_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags))
seq_printf(seq, ",nodecompose");
+ if (test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
+ seq_printf(seq, ",nobarrier");
return 0;
}
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index 208b16c..d66ad11 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -2,7 +2,8 @@
* linux/fs/hfsplus/part_tbl.c
*
* Copyright (C) 1996-1997 Paul H. Hargrove
- * This file may be distributed under the terms of the GNU General Public License.
+ * This file may be distributed under the terms of
+ * the GNU General Public License.
*
* Original code to handle the new style Mac partition table based on
* a patch contributed by Holger Schemel (aeglos@valinor.owl.de).
@@ -13,6 +14,7 @@
*
*/
+#include <linux/slab.h>
#include "hfsplus_fs.h"
/* offsets to various blocks */
@@ -58,77 +60,94 @@ struct new_pmap {
*/
struct old_pmap {
__be16 pdSig; /* Signature bytes */
- struct old_pmap_entry {
+ struct old_pmap_entry {
__be32 pdStart;
__be32 pdSize;
__be32 pdFSID;
} pdEntry[42];
} __packed;
+static int hfs_parse_old_pmap(struct super_block *sb, struct old_pmap *pm,
+ sector_t *part_start, sector_t *part_size)
+{
+ struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
+ int i;
+
+ for (i = 0; i < 42; i++) {
+ struct old_pmap_entry *p = &pm->pdEntry[i];
+
+ if (p->pdStart && p->pdSize &&
+ p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ &&
+ (sbi->part < 0 || sbi->part == i)) {
+ *part_start += be32_to_cpu(p->pdStart);
+ *part_size = be32_to_cpu(p->pdSize);
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
+ sector_t *part_start, sector_t *part_size)
+{
+ struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
+ int size = be32_to_cpu(pm->pmMapBlkCnt);
+ int res;
+ int i = 0;
+
+ do {
+ if (!memcmp(pm->pmPartType, "Apple_HFS", 9) &&
+ (sbi->part < 0 || sbi->part == i)) {
+ *part_start += be32_to_cpu(pm->pmPyPartStart);
+ *part_size = be32_to_cpu(pm->pmPartBlkCnt);
+ return 0;
+ }
+
+ if (++i >= size)
+ return -ENOENT;
+
+ res = hfsplus_submit_bio(sb->s_bdev,
+ *part_start + HFS_PMAP_BLK + i,
+ pm, READ);
+ if (res)
+ return res;
+ } while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC));
+
+ return -ENOENT;
+}
+
/*
- * hfs_part_find()
- *
- * Parse the partition map looking for the
- * start and length of the 'part'th HFS partition.
+ * Parse the partition map looking for the start and length of a
+ * HFS/HFS+ partition.
*/
int hfs_part_find(struct super_block *sb,
- sector_t *part_start, sector_t *part_size)
+ sector_t *part_start, sector_t *part_size)
{
- struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
- struct buffer_head *bh;
- __be16 *data;
- int i, size, res;
+ void *data;
+ int res;
+
+ data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
- res = -ENOENT;
- bh = sb_bread512(sb, *part_start + HFS_PMAP_BLK, data);
- if (!bh)
- return -EIO;
+ res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK,
+ data, READ);
+ if (res)
+ return res;
- switch (be16_to_cpu(*data)) {
+ switch (be16_to_cpu(*((__be16 *)data))) {
case HFS_OLD_PMAP_MAGIC:
- {
- struct old_pmap *pm;
- struct old_pmap_entry *p;
-
- pm = (struct old_pmap *)bh->b_data;
- p = pm->pdEntry;
- size = 42;
- for (i = 0; i < size; p++, i++) {
- if (p->pdStart && p->pdSize &&
- p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ &&
- (sbi->part < 0 || sbi->part == i)) {
- *part_start += be32_to_cpu(p->pdStart);
- *part_size = be32_to_cpu(p->pdSize);
- res = 0;
- }
- }
+ res = hfs_parse_old_pmap(sb, data, part_start, part_size);
break;
- }
case HFS_NEW_PMAP_MAGIC:
- {
- struct new_pmap *pm;
-
- pm = (struct new_pmap *)bh->b_data;
- size = be32_to_cpu(pm->pmMapBlkCnt);
- for (i = 0; i < size;) {
- if (!memcmp(pm->pmPartType,"Apple_HFS", 9) &&
- (sbi->part < 0 || sbi->part == i)) {
- *part_start += be32_to_cpu(pm->pmPyPartStart);
- *part_size = be32_to_cpu(pm->pmPartBlkCnt);
- res = 0;
- break;
- }
- brelse(bh);
- bh = sb_bread512(sb, *part_start + HFS_PMAP_BLK + ++i, pm);
- if (!bh)
- return -EIO;
- if (pm->pmSig != cpu_to_be16(HFS_NEW_PMAP_MAGIC))
- break;
- }
+ res = hfs_parse_new_pmap(sb, data, part_start, part_size);
+ break;
+ default:
+ res = -ENOENT;
break;
- }
}
- brelse(bh);
+ kfree(data);
return res;
}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a88d75..6ee6ad2 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pagemap.h>
+#include <linux/blkdev.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vfs.h>
@@ -66,6 +67,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
INIT_LIST_HEAD(&HFSPLUS_I(inode)->open_dir_list);
mutex_init(&HFSPLUS_I(inode)->extents_lock);
HFSPLUS_I(inode)->flags = 0;
+ HFSPLUS_I(inode)->extent_state = 0;
HFSPLUS_I(inode)->rsrc_inode = NULL;
atomic_set(&HFSPLUS_I(inode)->opencnt, 0);
@@ -157,45 +159,65 @@ int hfsplus_sync_fs(struct super_block *sb, int wait)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
struct hfsplus_vh *vhdr = sbi->s_vhdr;
+ int write_backup = 0;
+ int error, error2;
+
+ if (!wait)
+ return 0;
dprint(DBG_SUPER, "hfsplus_write_super\n");
- mutex_lock(&sbi->vh_mutex);
- mutex_lock(&sbi->alloc_mutex);
sb->s_dirt = 0;
+ /*
+ * Explicitly write out the special metadata inodes.
+ *
+ * While these special inodes are marked as hashed and written
+ * out peridocically by the flusher threads we redirty them
+ * during writeout of normal inodes, and thus the life lock
+ * prevents us from getting the latest state to disk.
+ */
+ error = filemap_write_and_wait(sbi->cat_tree->inode->i_mapping);
+ error2 = filemap_write_and_wait(sbi->ext_tree->inode->i_mapping);
+ if (!error)
+ error = error2;
+ error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping);
+ if (!error)
+ error = error2;
+
+ mutex_lock(&sbi->vh_mutex);
+ mutex_lock(&sbi->alloc_mutex);
vhdr->free_blocks = cpu_to_be32(sbi->free_blocks);
vhdr->next_cnid = cpu_to_be32(sbi->next_cnid);
vhdr->folder_count = cpu_to_be32(sbi->folder_count);
vhdr->file_count = cpu_to_be32(sbi->file_count);
- mark_buffer_dirty(sbi->s_vhbh);
if (test_and_clear_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags)) {
- if (sbi->sect_count) {
- struct buffer_head *bh;
- u32 block, offset;
-
- block = sbi->blockoffset;
- block += (sbi->sect_count - 2) >> (sb->s_blocksize_bits - 9);
- offset = ((sbi->sect_count - 2) << 9) & (sb->s_blocksize - 1);
- printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n",
- sbi->blockoffset, sbi->sect_count,
- block, offset);
- bh = sb_bread(sb, block);
- if (bh) {
- vhdr = (struct hfsplus_vh *)(bh->b_data + offset);
- if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) {
- memcpy(vhdr, sbi->s_vhdr, sizeof(*vhdr));
- mark_buffer_dirty(bh);
- brelse(bh);
- } else
- printk(KERN_WARNING "hfs: backup not found!\n");
- }
- }
+ memcpy(sbi->s_backup_vhdr, sbi->s_vhdr, sizeof(*sbi->s_vhdr));
+ write_backup = 1;
}
+
+ error2 = hfsplus_submit_bio(sb->s_bdev,
+ sbi->part_start + HFSPLUS_VOLHEAD_SECTOR,
+ sbi->s_vhdr, WRITE_SYNC);
+ if (!error)
+ error = error2;
+ if (!write_backup)
+ goto out;
+
+ error2 = hfsplus_submit_bio(sb->s_bdev,
+ sbi->part_start + sbi->sect_count - 2,
+ sbi->s_backup_vhdr, WRITE_SYNC);
+ if (!error)
+ error2 = error;
+out:
mutex_unlock(&sbi->alloc_mutex);
mutex_unlock(&sbi->vh_mutex);
- return 0;
+
+ if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
+ blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
+
+ return error;
}
static void hfsplus_write_super(struct super_block *sb)
@@ -215,23 +237,22 @@ static void hfsplus_put_super(struct super_block *sb)
if (!sb->s_fs_info)
return;
- if (sb->s_dirt)
- hfsplus_write_super(sb);
if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) {
struct hfsplus_vh *vhdr = sbi->s_vhdr;
vhdr->modify_date = hfsp_now2mt();
vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT);
vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT);
- mark_buffer_dirty(sbi->s_vhbh);
- sync_dirty_buffer(sbi->s_vhbh);
+
+ hfsplus_sync_fs(sb, 1);
}
hfs_btree_close(sbi->cat_tree);
hfs_btree_close(sbi->ext_tree);
iput(sbi->alloc_file);
iput(sbi->hidden_dir);
- brelse(sbi->s_vhbh);
+ kfree(sbi->s_vhdr);
+ kfree(sbi->s_backup_vhdr);
unload_nls(sbi->nls);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
@@ -263,26 +284,31 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
return 0;
if (!(*flags & MS_RDONLY)) {
struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr;
- struct hfsplus_sb_info sbi;
+ int force = 0;
- memset(&sbi, 0, sizeof(struct hfsplus_sb_info));
- sbi.nls = HFSPLUS_SB(sb)->nls;
- if (!hfsplus_parse_options(data, &sbi))
+ if (!hfsplus_parse_options_remount(data, &force))
return -EINVAL;
if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {
- printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, "
- "running fsck.hfsplus is recommended. leaving read-only.\n");
+ printk(KERN_WARNING "hfs: filesystem was "
+ "not cleanly unmounted, "
+ "running fsck.hfsplus is recommended. "
+ "leaving read-only.\n");
sb->s_flags |= MS_RDONLY;
*flags |= MS_RDONLY;
- } else if (test_bit(HFSPLUS_SB_FORCE, &sbi.flags)) {
+ } else if (force) {
/* nothing */
- } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
- printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n");
+ } else if (vhdr->attributes &
+ cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
+ printk(KERN_WARNING "hfs: filesystem is marked locked, "
+ "leaving read-only.\n");
sb->s_flags |= MS_RDONLY;
*flags |= MS_RDONLY;
- } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
- printk(KERN_WARNING "hfs: filesystem is marked journaled, leaving read-only.\n");
+ } else if (vhdr->attributes &
+ cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
+ printk(KERN_WARNING "hfs: filesystem is "
+ "marked journaled, "
+ "leaving read-only.\n");
sb->s_flags |= MS_RDONLY;
*flags |= MS_RDONLY;
}
@@ -372,17 +398,22 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
sb->s_maxbytes = MAX_LFS_FILESIZE;
if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {
- printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, "
- "running fsck.hfsplus is recommended. mounting read-only.\n");
+ printk(KERN_WARNING "hfs: Filesystem was "
+ "not cleanly unmounted, "
+ "running fsck.hfsplus is recommended. "
+ "mounting read-only.\n");
sb->s_flags |= MS_RDONLY;
} else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) {
/* nothing */
} else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n");
sb->s_flags |= MS_RDONLY;
- } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && !(sb->s_flags & MS_RDONLY)) {
- printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, "
- "use the force option at your own risk, mounting read-only.\n");
+ } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) &&
+ !(sb->s_flags & MS_RDONLY)) {
+ printk(KERN_WARNING "hfs: write access to "
+ "a journaled filesystem is not supported, "
+ "use the force option at your own risk, "
+ "mounting read-only.\n");
sb->s_flags |= MS_RDONLY;
}
@@ -419,7 +450,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
err = -ENOMEM;
goto cleanup;
}
- sb->s_root->d_op = &hfsplus_dentry_operations;
+ d_set_d_op(sb->s_root, &hfsplus_dentry_operations);
str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
str.name = HFSP_HIDDENDIR_NAME;
@@ -449,19 +480,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
be32_add_cpu(&vhdr->write_count, 1);
vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
- mark_buffer_dirty(sbi->s_vhbh);
- sync_dirty_buffer(sbi->s_vhbh);
+ hfsplus_sync_fs(sb, 1);
if (!sbi->hidden_dir) {
- printk(KERN_DEBUG "hfs: create hidden dir...\n");
-
mutex_lock(&sbi->vh_mutex);
sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode,
&str, sbi->hidden_dir);
mutex_unlock(&sbi->vh_mutex);
- mark_inode_dirty(sbi->hidden_dir);
+ hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY);
}
out:
unload_nls(sbi->nls);
@@ -488,25 +516,31 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
return i ? &i->vfs_inode : NULL;
}
-static void hfsplus_destroy_inode(struct inode *inode)
+static void hfsplus_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode));
}
+static void hfsplus_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, hfsplus_i_callback);
+}
+
#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info)
-static int hfsplus_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super);
}
static struct file_system_type hfsplus_fs_type = {
.owner = THIS_MODULE,
.name = "hfsplus",
- .get_sb = hfsplus_get_sb,
+ .mount = hfsplus_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index b66d67d..a3f0bfc 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -17,14 +17,14 @@
/* Returns folded char, or 0 if ignorable */
static inline u16 case_fold(u16 c)
{
- u16 tmp;
-
- tmp = hfsplus_case_fold_table[c >> 8];
- if (tmp)
- tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
- else
- tmp = c;
- return tmp;
+ u16 tmp;
+
+ tmp = hfsplus_case_fold_table[c >> 8];
+ if (tmp)
+ tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
+ else
+ tmp = c;
+ return tmp;
}
/* Compare unicode strings, return values like normal strcmp */
@@ -118,7 +118,9 @@ static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
return NULL;
}
-int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p)
+int hfsplus_uni2asc(struct super_block *sb,
+ const struct hfsplus_unistr *ustr,
+ char *astr, int *len_p)
{
const hfsplus_unichr *ip;
struct nls_table *nls = HFSPLUS_SB(sb)->nls;
@@ -171,7 +173,8 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c
goto same;
c1 = be16_to_cpu(*ip);
if (likely(compose))
- ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c1);
+ ce1 = hfsplus_compose_lookup(
+ hfsplus_compose_table, c1);
if (ce1)
break;
switch (c0) {
@@ -199,7 +202,8 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c
if (ce2) {
i = 1;
while (i < ustrlen) {
- ce1 = hfsplus_compose_lookup(ce2, be16_to_cpu(ip[i]));
+ ce1 = hfsplus_compose_lookup(ce2,
+ be16_to_cpu(ip[i]));
if (!ce1)
break;
i++;
@@ -211,7 +215,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c
goto done;
}
}
- same:
+same:
switch (c0) {
case 0:
cc = 0x2400;
@@ -222,7 +226,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c
default:
cc = c0;
}
- done:
+done:
res = nls->uni2char(cc, op, len);
if (res < 0) {
if (res == -ENAMETOOLONG)
@@ -320,7 +324,8 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
* Composed unicode characters are decomposed and case-folding is performed
* if the appropriate bits are (un)set on the superblock.
*/
-int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
+int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *str)
{
struct super_block *sb = dentry->d_sb;
const char *astr;
@@ -363,9 +368,12 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
* Composed unicode characters are decomposed and case-folding is performed
* if the appropriate bits are (un)set on the superblock.
*/
-int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2)
+int hfsplus_compare_dentry(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- struct super_block *sb = dentry->d_sb;
+ struct super_block *sb = parent->d_sb;
int casefold, decompose, size;
int dsize1, dsize2, len1, len2;
const u16 *dstr1, *dstr2;
@@ -375,10 +383,10 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *
casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
- astr1 = s1->name;
- len1 = s1->len;
- astr2 = s2->name;
- len2 = s2->len;
+ astr1 = str;
+ len1 = len;
+ astr2 = name->name;
+ len2 = name->len;
dsize1 = dsize2 = 0;
dstr1 = dstr2 = NULL;
@@ -388,7 +396,9 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *
astr1 += size;
len1 -= size;
- if (!decompose || !(dstr1 = decompose_unichar(c, &dsize1))) {
+ if (decompose)
+ dstr1 = decompose_unichar(c, &dsize1);
+ if (!decompose || !dstr1) {
c1 = c;
dstr1 = &c1;
dsize1 = 1;
@@ -400,7 +410,9 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *
astr2 += size;
len2 -= size;
- if (!decompose || !(dstr2 = decompose_unichar(c, &dsize2))) {
+ if (decompose)
+ dstr2 = decompose_unichar(c, &dsize2);
+ if (!decompose || !dstr2) {
c2 = c;
dstr2 = &c2;
dsize2 = 1;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 8972c20..1962317 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -24,6 +24,40 @@ struct hfsplus_wd {
u16 embed_count;
};
+static void hfsplus_end_io_sync(struct bio *bio, int err)
+{
+ if (err)
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ complete(bio->bi_private);
+}
+
+int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
+ void *data, int rw)
+{
+ DECLARE_COMPLETION_ONSTACK(wait);
+ struct bio *bio;
+
+ bio = bio_alloc(GFP_NOIO, 1);
+ bio->bi_sector = sector;
+ bio->bi_bdev = bdev;
+ bio->bi_end_io = hfsplus_end_io_sync;
+ bio->bi_private = &wait;
+
+ /*
+ * We always submit one sector at a time, so bio_add_page must not fail.
+ */
+ if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE,
+ offset_in_page(data)) != HFSPLUS_SECTOR_SIZE)
+ BUG();
+
+ submit_bio(rw, bio);
+ wait_for_completion(&wait);
+
+ if (!bio_flagged(bio, BIO_UPTODATE))
+ return -EIO;
+ return 0;
+}
+
static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
{
u32 extent;
@@ -40,12 +74,14 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
!(attrib & HFSP_WRAP_ATTRIB_SPARED))
return 0;
- wd->ablk_size = be32_to_cpu(*(__be32 *)(bufptr + HFSP_WRAPOFF_ABLKSIZE));
+ wd->ablk_size =
+ be32_to_cpu(*(__be32 *)(bufptr + HFSP_WRAPOFF_ABLKSIZE));
if (wd->ablk_size < HFSPLUS_SECTOR_SIZE)
return 0;
if (wd->ablk_size % HFSPLUS_SECTOR_SIZE)
return 0;
- wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART));
+ wd->ablk_start =
+ be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART));
extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT);
wd->embed_start = (extent >> 16) & 0xFFFF;
@@ -68,7 +104,8 @@ static int hfsplus_get_last_session(struct super_block *sb,
if (HFSPLUS_SB(sb)->session >= 0) {
te.cdte_track = HFSPLUS_SB(sb)->session;
te.cdte_format = CDROM_LBA;
- res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te);
+ res = ioctl_by_bdev(sb->s_bdev,
+ CDROMREADTOCENTRY, (unsigned long)&te);
if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) {
*start = (sector_t)te.cdte_addr.lba << 2;
return 0;
@@ -77,7 +114,8 @@ static int hfsplus_get_last_session(struct super_block *sb,
return -EINVAL;
}
ms_info.addr_format = CDROM_LBA;
- res = ioctl_by_bdev(sb->s_bdev, CDROMMULTISESSION, (unsigned long)&ms_info);
+ res = ioctl_by_bdev(sb->s_bdev, CDROMMULTISESSION,
+ (unsigned long)&ms_info);
if (!res && ms_info.xa_flag)
*start = (sector_t)ms_info.addr.lba << 2;
return 0;
@@ -88,100 +126,112 @@ static int hfsplus_get_last_session(struct super_block *sb,
int hfsplus_read_wrapper(struct super_block *sb)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
- struct buffer_head *bh;
- struct hfsplus_vh *vhdr;
struct hfsplus_wd wd;
sector_t part_start, part_size;
u32 blocksize;
+ int error = 0;
+ error = -EINVAL;
blocksize = sb_min_blocksize(sb, HFSPLUS_SECTOR_SIZE);
if (!blocksize)
- return -EINVAL;
+ goto out;
if (hfsplus_get_last_session(sb, &part_start, &part_size))
- return -EINVAL;
+ goto out;
if ((u64)part_start + part_size > 0x100000000ULL) {
pr_err("hfs: volumes larger than 2TB are not supported yet\n");
- return -EINVAL;
+ goto out;
}
- while (1) {
- bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr);
- if (!bh)
- return -EIO;
-
- if (vhdr->signature == cpu_to_be16(HFSP_WRAP_MAGIC)) {
- if (!hfsplus_read_mdb(vhdr, &wd))
- goto error;
- wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
- part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
- part_size = wd.embed_count * wd.ablk_size;
- brelse(bh);
- bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr);
- if (!bh)
- return -EIO;
- }
- if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG))
- break;
- if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) {
- set_bit(HFSPLUS_SB_HFSX, &sbi->flags);
- break;
- }
- brelse(bh);
- /* check for a partition block
+ error = -ENOMEM;
+ sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
+ if (!sbi->s_vhdr)
+ goto out;
+ sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
+ if (!sbi->s_backup_vhdr)
+ goto out_free_vhdr;
+
+reread:
+ error = hfsplus_submit_bio(sb->s_bdev,
+ part_start + HFSPLUS_VOLHEAD_SECTOR,
+ sbi->s_vhdr, READ);
+ if (error)
+ goto out_free_backup_vhdr;
+
+ error = -EINVAL;
+ switch (sbi->s_vhdr->signature) {
+ case cpu_to_be16(HFSPLUS_VOLHEAD_SIGX):
+ set_bit(HFSPLUS_SB_HFSX, &sbi->flags);
+ /*FALLTHRU*/
+ case cpu_to_be16(HFSPLUS_VOLHEAD_SIG):
+ break;
+ case cpu_to_be16(HFSP_WRAP_MAGIC):
+ if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
+ goto out;
+ wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
+ part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
+ part_size = wd.embed_count * wd.ablk_size;
+ goto reread;
+ default:
+ /*
+ * Check for a partition block.
+ *
* (should do this only for cdrom/loop though)
*/
if (hfs_part_find(sb, &part_start, &part_size))
- return -EINVAL;
+ goto out;
+ goto reread;
+ }
+
+ error = hfsplus_submit_bio(sb->s_bdev,
+ part_start + part_size - 2,
+ sbi->s_backup_vhdr, READ);
+ if (error)
+ goto out_free_backup_vhdr;
+
+ error = -EINVAL;
+ if (sbi->s_backup_vhdr->signature != sbi->s_vhdr->signature) {
+ printk(KERN_WARNING
+ "hfs: invalid secondary volume header\n");
+ goto out_free_backup_vhdr;
}
- blocksize = be32_to_cpu(vhdr->blocksize);
- brelse(bh);
+ blocksize = be32_to_cpu(sbi->s_vhdr->blocksize);
- /* block size must be at least as large as a sector
- * and a multiple of 2
+ /*
+ * Block size must be at least as large as a sector and a multiple of 2.
*/
- if (blocksize < HFSPLUS_SECTOR_SIZE ||
- ((blocksize - 1) & blocksize))
- return -EINVAL;
+ if (blocksize < HFSPLUS_SECTOR_SIZE || ((blocksize - 1) & blocksize))
+ goto out_free_backup_vhdr;
sbi->alloc_blksz = blocksize;
sbi->alloc_blksz_shift = 0;
while ((blocksize >>= 1) != 0)
sbi->alloc_blksz_shift++;
blocksize = min(sbi->alloc_blksz, (u32)PAGE_SIZE);
- /* align block size to block offset */
+ /*
+ * Align block size to block offset.
+ */
while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1))
blocksize >>= 1;
if (sb_set_blocksize(sb, blocksize) != blocksize) {
- printk(KERN_ERR "hfs: unable to set blocksize to %u!\n", blocksize);
- return -EINVAL;
+ printk(KERN_ERR "hfs: unable to set blocksize to %u!\n",
+ blocksize);
+ goto out_free_backup_vhdr;
}
sbi->blockoffset =
part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT);
+ sbi->part_start = part_start;
sbi->sect_count = part_size;
sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits;
-
- bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr);
- if (!bh)
- return -EIO;
-
- /* should still be the same... */
- if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) {
- if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX))
- goto error;
- } else {
- if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIG))
- goto error;
- }
-
- sbi->s_vhbh = bh;
- sbi->s_vhdr = vhdr;
-
return 0;
- error:
- brelse(bh);
- return -EINVAL;
+
+out_free_backup_vhdr:
+ kfree(sbi->s_backup_vhdr);
+out_free_vhdr:
+ kfree(sbi->s_vhdr);
+out:
+ return error;
}
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index cd7c939..d3244d9 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -32,7 +32,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode)
-static int hostfs_d_delete(struct dentry *dentry)
+static int hostfs_d_delete(const struct dentry *dentry)
{
return 1;
}
@@ -92,12 +92,10 @@ __uml_setup("hostfs=", hostfs_args,
static char *__dentry_name(struct dentry *dentry, char *name)
{
- char *p = __dentry_path(dentry, name, PATH_MAX);
+ char *p = dentry_path_raw(dentry, name, PATH_MAX);
char *root;
size_t len;
- spin_unlock(&dcache_lock);
-
root = dentry->d_sb->s_fs_info;
len = strlen(root);
if (IS_ERR(p)) {
@@ -123,25 +121,23 @@ static char *dentry_name(struct dentry *dentry)
if (!name)
return NULL;
- spin_lock(&dcache_lock);
return __dentry_name(dentry, name); /* will unlock */
}
static char *inode_name(struct inode *ino)
{
struct dentry *dentry;
- char *name = __getname();
- if (!name)
- return NULL;
+ char *name;
- spin_lock(&dcache_lock);
- if (list_empty(&ino->i_dentry)) {
- spin_unlock(&dcache_lock);
- __putname(name);
+ dentry = d_find_alias(ino);
+ if (!dentry)
return NULL;
- }
- dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias);
- return __dentry_name(dentry, name); /* will unlock */
+
+ name = dentry_name(dentry);
+
+ dput(dentry);
+
+ return name;
}
static char *follow_link(char *link)
@@ -251,11 +247,18 @@ static void hostfs_evict_inode(struct inode *inode)
}
}
-static void hostfs_destroy_inode(struct inode *inode)
+static void hostfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kfree(HOSTFS_I(inode));
}
+static void hostfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, hostfs_i_callback);
+}
+
static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
{
const char *root_path = vfs->mnt_sb->s_fs_info;
@@ -609,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
goto out_put;
d_add(dentry, inode);
- dentry->d_op = &hostfs_dentry_ops;
+ d_set_d_op(dentry, &hostfs_dentry_ops);
return NULL;
out_put:
@@ -746,11 +749,14 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
return err;
}
-int hostfs_permission(struct inode *ino, int desired)
+int hostfs_permission(struct inode *ino, int desired, unsigned int flags)
{
char *name;
int r = 0, w = 0, x = 0, err;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
if (desired & MAY_READ) r = 1;
if (desired & MAY_WRITE) w = 1;
if (desired & MAY_EXEC) x = 1;
@@ -765,7 +771,7 @@ int hostfs_permission(struct inode *ino, int desired)
err = access_file(name, r, w, x);
__putname(name);
if (!err)
- err = generic_permission(ino, desired, NULL);
+ err = generic_permission(ino, desired, flags, NULL);
return err;
}
@@ -962,11 +968,11 @@ out:
return err;
}
-static int hostfs_read_sb(struct file_system_type *type,
+static struct dentry *hostfs_read_sb(struct file_system_type *type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt);
+ return mount_nodev(type, flags, data, hostfs_fill_sb_common);
}
static void hostfs_kill_sb(struct super_block *s)
@@ -978,7 +984,7 @@ static void hostfs_kill_sb(struct super_block *s)
static struct file_system_type hostfs_type = {
.owner = THIS_MODULE,
.name = "hostfs",
- .get_sb = hostfs_read_sb,
+ .mount = hostfs_read_sb,
.kill_sb = hostfs_kill_sb,
.fs_flags = 0,
};
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index eac5f96..793cb9d 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -14,7 +14,7 @@ void hpfs_lock_creation(struct super_block *s)
#ifdef DEBUG_LOCKS
printk("lock creation\n");
#endif
- down(&hpfs_sb(s)->hpfs_creation_de);
+ mutex_lock(&hpfs_sb(s)->hpfs_creation_de);
}
void hpfs_unlock_creation(struct super_block *s)
@@ -22,7 +22,7 @@ void hpfs_unlock_creation(struct super_block *s)
#ifdef DEBUG_LOCKS
printk("unlock creation\n");
#endif
- up(&hpfs_sb(s)->hpfs_creation_de);
+ mutex_unlock(&hpfs_sb(s)->hpfs_creation_de);
}
/* Map a sector into a buffer and return pointers to it and to the buffer. */
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index 67d9d36..32c13a9 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -12,7 +12,8 @@
* Note: the dentry argument is the parent dentry.
*/
-static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
+static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
unsigned long hash;
int i;
@@ -34,19 +35,25 @@ static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
return 0;
}
-static int hpfs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
+static int hpfs_compare_dentry(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- unsigned al=a->len;
- unsigned bl=b->len;
- hpfs_adjust_length(a->name, &al);
+ unsigned al = len;
+ unsigned bl = name->len;
+
+ hpfs_adjust_length(str, &al);
/*hpfs_adjust_length(b->name, &bl);*/
- /* 'a' is the qstr of an already existing dentry, so the name
- * must be valid. 'b' must be validated first.
+
+ /*
+ * 'str' is the nane of an already existing dentry, so the name
+ * must be valid. 'name' must be validated first.
*/
- if (hpfs_chk_name(b->name, &bl))
+ if (hpfs_chk_name(name->name, &bl))
return 1;
- if (hpfs_compare_names(dentry->d_sb, a->name, al, b->name, bl, 0))
+ if (hpfs_compare_names(parent->d_sb, str, al, name->name, bl, 0))
return 1;
return 0;
}
@@ -58,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = {
void hpfs_set_dentry_operations(struct dentry *dentry)
{
- dentry->d_op = &hpfs_dentry_operations;
+ d_set_d_op(dentry, &hpfs_dentry_operations);
}
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index b59eac0..2fee17d 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -87,7 +87,7 @@ struct hpfs_sb_info {
unsigned *sb_bmp_dir; /* main bitmap directory */
unsigned sb_c_bitmap; /* current bitmap */
unsigned sb_max_fwd_alloc; /* max forwad allocation */
- struct semaphore hpfs_creation_de; /* when creating dirents, nobody else
+ struct mutex hpfs_creation_de; /* when creating dirents, nobody else
can alloc blocks */
/*unsigned sb_mounting : 1;*/
int sb_timeshift;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 11c2b40..f4ad9e3 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -419,7 +419,7 @@ again:
unlock_kernel();
return -ENOSPC;
}
- if (generic_permission(inode, MAY_WRITE, NULL) ||
+ if (generic_permission(inode, MAY_WRITE, 0, NULL) ||
!S_ISREG(inode->i_mode) ||
get_write_access(inode)) {
d_rehash(dentry);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c969a1a..49935ba 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -177,11 +177,18 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void hpfs_destroy_inode(struct inode *inode)
+static void hpfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
}
+static void hpfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, hpfs_i_callback);
+}
+
static void init_once(void *foo)
{
struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
@@ -491,7 +498,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
sbi->sb_bmp_dir = NULL;
sbi->sb_cp_table = NULL;
- init_MUTEX(&sbi->hpfs_creation_de);
+ mutex_init(&sbi->hpfs_creation_de);
uid = current_uid();
gid = current_gid();
@@ -686,17 +693,16 @@ bail0:
return -EINVAL;
}
-static int hpfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *hpfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, hpfs_fill_super);
}
static struct file_system_type hpfs_fs_type = {
.owner = THIS_MODULE,
.name = "hpfs",
- .get_sb = hpfs_get_sb,
+ .mount = hpfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 4e2a45e..87ed48e 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -632,11 +632,18 @@ void hppfs_evict_inode(struct inode *ino)
mntput(ino->i_sb->s_fs_info);
}
-static void hppfs_destroy_inode(struct inode *inode)
+static void hppfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kfree(HPPFS_I(inode));
}
+static void hppfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, hppfs_i_callback);
+}
+
static const struct super_operations hppfs_sbops = {
.alloc_inode = hppfs_alloc_inode,
.destroy_inode = hppfs_destroy_inode,
@@ -748,17 +755,17 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
return(err);
}
-static int hppfs_read_super(struct file_system_type *type,
+static struct dentry *hppfs_read_super(struct file_system_type *type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt);
+ return mount_nodev(type, flags, data, hppfs_fill_super);
}
static struct file_system_type hppfs_type = {
.owner = THIS_MODULE,
.name = "hppfs",
- .get_sb = hppfs_read_super,
+ .mount = hppfs_read_super,
.kill_sb = kill_anon_super,
.fs_flags = 0,
};
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b14be3f..9885082 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -663,11 +663,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
return &p->vfs_inode;
}
+static void hugetlbfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+}
+
static void hugetlbfs_destroy_inode(struct inode *inode)
{
hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
- kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+ call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
}
static const struct address_space_operations hugetlbfs_aops = {
@@ -896,15 +903,15 @@ void hugetlb_put_quota(struct address_space *mapping, long delta)
}
}
-static int hugetlbfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt);
+ return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
}
static struct file_system_type hugetlbfs_fs_type = {
.name = "hugetlbfs",
- .get_sb = hugetlbfs_get_sb,
+ .mount = hugetlbfs_mount,
.kill_sb = kill_litter_super,
};
@@ -932,8 +939,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
*user = current_user();
if (user_shm_lock(size, *user)) {
- WARN_ONCE(1,
- "Using mlock ulimits for SHM_HUGETLB deprecated\n");
+ printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n");
} else {
*user = NULL;
return ERR_PTR(-EPERM);
diff --git a/fs/inode.c b/fs/inode.c
index ae2727a..da85e56 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -102,26 +102,29 @@ static DECLARE_RWSEM(iprune_sem);
*/
struct inodes_stat_t inodes_stat;
-static struct percpu_counter nr_inodes __cacheline_aligned_in_smp;
-static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
+static DEFINE_PER_CPU(unsigned int, nr_inodes);
static struct kmem_cache *inode_cachep __read_mostly;
-static inline int get_nr_inodes(void)
+static int get_nr_inodes(void)
{
- return percpu_counter_sum_positive(&nr_inodes);
+ int i;
+ int sum = 0;
+ for_each_possible_cpu(i)
+ sum += per_cpu(nr_inodes, i);
+ return sum < 0 ? 0 : sum;
}
static inline int get_nr_inodes_unused(void)
{
- return percpu_counter_sum_positive(&nr_inodes_unused);
+ return inodes_stat.nr_unused;
}
int get_nr_dirty_inodes(void)
{
+ /* not actually dirty inodes, but a wild approximation */
int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
return nr_dirty > 0 ? nr_dirty : 0;
-
}
/*
@@ -132,7 +135,6 @@ int proc_nr_inodes(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
inodes_stat.nr_inodes = get_nr_inodes();
- inodes_stat.nr_unused = get_nr_inodes_unused();
return proc_dointvec(table, write, buffer, lenp, ppos);
}
#endif
@@ -224,7 +226,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_fsnotify_mask = 0;
#endif
- percpu_counter_inc(&nr_inodes);
+ this_cpu_inc(nr_inodes);
return 0;
out:
@@ -255,6 +257,12 @@ static struct inode *alloc_inode(struct super_block *sb)
return inode;
}
+void free_inode_nonrcu(struct inode *inode)
+{
+ kmem_cache_free(inode_cachep, inode);
+}
+EXPORT_SYMBOL(free_inode_nonrcu);
+
void __destroy_inode(struct inode *inode)
{
BUG_ON(inode_has_buffers(inode));
@@ -266,10 +274,17 @@ void __destroy_inode(struct inode *inode)
if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
posix_acl_release(inode->i_default_acl);
#endif
- percpu_counter_dec(&nr_inodes);
+ this_cpu_dec(nr_inodes);
}
EXPORT_SYMBOL(__destroy_inode);
+static void i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(inode_cachep, inode);
+}
+
static void destroy_inode(struct inode *inode)
{
BUG_ON(!list_empty(&inode->i_lru));
@@ -277,7 +292,7 @@ static void destroy_inode(struct inode *inode)
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
else
- kmem_cache_free(inode_cachep, (inode));
+ call_rcu(&inode->i_rcu, i_callback);
}
/*
@@ -335,7 +350,7 @@ static void inode_lru_list_add(struct inode *inode)
{
if (list_empty(&inode->i_lru)) {
list_add(&inode->i_lru, &inode_lru);
- percpu_counter_inc(&nr_inodes_unused);
+ inodes_stat.nr_unused++;
}
}
@@ -343,7 +358,7 @@ static void inode_lru_list_del(struct inode *inode)
{
if (!list_empty(&inode->i_lru)) {
list_del_init(&inode->i_lru);
- percpu_counter_dec(&nr_inodes_unused);
+ inodes_stat.nr_unused--;
}
}
@@ -430,6 +445,7 @@ void end_writeback(struct inode *inode)
BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(inode->i_state & I_CLEAR);
inode_sync_wait(inode);
+ /* don't need i_lock here, no concurrent mods to i_state */
inode->i_state = I_FREEING | I_CLEAR;
}
EXPORT_SYMBOL(end_writeback);
@@ -513,7 +529,7 @@ void evict_inodes(struct super_block *sb)
list_move(&inode->i_lru, &dispose);
list_del_init(&inode->i_wb_list);
if (!(inode->i_state & (I_DIRTY | I_SYNC)))
- percpu_counter_dec(&nr_inodes_unused);
+ inodes_stat.nr_unused--;
}
spin_unlock(&inode_lock);
@@ -554,7 +570,7 @@ int invalidate_inodes(struct super_block *sb)
list_move(&inode->i_lru, &dispose);
list_del_init(&inode->i_wb_list);
if (!(inode->i_state & (I_DIRTY | I_SYNC)))
- percpu_counter_dec(&nr_inodes_unused);
+ inodes_stat.nr_unused--;
}
spin_unlock(&inode_lock);
@@ -616,7 +632,7 @@ static void prune_icache(int nr_to_scan)
if (atomic_read(&inode->i_count) ||
(inode->i_state & ~I_REFERENCED)) {
list_del_init(&inode->i_lru);
- percpu_counter_dec(&nr_inodes_unused);
+ inodes_stat.nr_unused--;
continue;
}
@@ -650,7 +666,7 @@ static void prune_icache(int nr_to_scan)
*/
list_move(&inode->i_lru, &freeable);
list_del_init(&inode->i_wb_list);
- percpu_counter_dec(&nr_inodes_unused);
+ inodes_stat.nr_unused--;
}
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
@@ -1648,8 +1664,6 @@ void __init inode_init(void)
SLAB_MEM_SPREAD),
init_once);
register_shrinker(&icache_shrinker);
- percpu_counter_init(&nr_inodes, 0);
- percpu_counter_init(&nr_inodes_unused, 0);
/* Hash may have been set up in inode_init_early */
if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index ebad3b9..9687c2e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **);
extern void free_vfsmnt(struct vfsmount *);
extern struct vfsmount *alloc_vfsmnt(const char *);
+extern unsigned int mnt_get_count(struct vfsmount *mnt);
extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
struct vfsmount *);
@@ -106,5 +107,5 @@ extern void release_open_intent(struct nameidata *);
* inode.c
*/
extern int get_nr_dirty_inodes(void);
-extern int evict_inodes(struct super_block *);
+extern void evict_inodes(struct super_block *);
extern int invalidate_inodes(struct super_block *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index e92fdbb..d6cc164 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -6,7 +6,6 @@
#include <linux/syscalls.h>
#include <linux/mm.h>
-#include <linux/smp_lock.h>
#include <linux/capability.h>
#include <linux/file.h>
#include <linux/fs.h>
@@ -530,41 +529,6 @@ static int ioctl_fsthaw(struct file *filp)
return thaw_super(sb);
}
-static int ioctl_fstrim(struct file *filp, void __user *argp)
-{
- struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
- struct fstrim_range range;
- int ret = 0;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- /* If filesystem doesn't support trim feature, return. */
- if (sb->s_op->trim_fs == NULL)
- return -EOPNOTSUPP;
-
- /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
- if (sb->s_bdev == NULL)
- return -EINVAL;
-
- if (argp == NULL) {
- range.start = 0;
- range.len = ULLONG_MAX;
- range.minlen = 0;
- } else if (copy_from_user(&range, argp, sizeof(range)))
- return -EFAULT;
-
- ret = sb->s_op->trim_fs(sb, &range);
- if (ret < 0)
- return ret;
-
- if ((argp != NULL) &&
- (copy_to_user(argp, &range, sizeof(range))))
- return -EFAULT;
-
- return 0;
-}
-
/*
* When you add any new common ioctls to the switches above and below
* please update compat_sys_ioctl() too.
@@ -615,10 +579,6 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
error = ioctl_fsthaw(filp);
break;
- case FITRIM:
- error = ioctl_fstrim(filp, argp);
- break;
-
case FS_IOC_FIEMAP:
return ioctl_fiemap(filp, arg);
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 748cfb9..7da2a06 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -103,12 +103,7 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
}
ret = -ESRCH;
- /*
- * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic",
- * so we can't use rcu_read_lock(). See re-copy of ->ioprio
- * in copy_process().
- */
- read_lock(&tasklist_lock);
+ rcu_read_lock();
switch (which) {
case IOPRIO_WHO_PROCESS:
if (!who)
@@ -153,7 +148,7 @@ free_uid:
ret = -EINVAL;
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return ret;
}
@@ -197,7 +192,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
int ret = -ESRCH;
int tmpio;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
switch (which) {
case IOPRIO_WHO_PROCESS:
if (!who)
@@ -250,6 +245,6 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
ret = -EINVAL;
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return ret;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 79cf7f61..844a790 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -26,16 +26,32 @@
#define BEQUIET
-static int isofs_hashi(struct dentry *parent, struct qstr *qstr);
-static int isofs_hash(struct dentry *parent, struct qstr *qstr);
-static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b);
-static int isofs_dentry_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b);
+static int isofs_hashi(const struct dentry *parent, const struct inode *inode,
+ struct qstr *qstr);
+static int isofs_hash(const struct dentry *parent, const struct inode *inode,
+ struct qstr *qstr);
+static int isofs_dentry_cmpi(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
+static int isofs_dentry_cmp(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
#ifdef CONFIG_JOLIET
-static int isofs_hashi_ms(struct dentry *parent, struct qstr *qstr);
-static int isofs_hash_ms(struct dentry *parent, struct qstr *qstr);
-static int isofs_dentry_cmpi_ms(struct dentry *dentry, struct qstr *a, struct qstr *b);
-static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qstr *b);
+static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode,
+ struct qstr *qstr);
+static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode,
+ struct qstr *qstr);
+static int isofs_dentry_cmpi_ms(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
+static int isofs_dentry_cmp_ms(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
#endif
static void isofs_put_super(struct super_block *sb)
@@ -65,11 +81,18 @@ static struct inode *isofs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void isofs_destroy_inode(struct inode *inode)
+static void isofs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
}
+static void isofs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, isofs_i_callback);
+}
+
static void init_once(void *foo)
{
struct iso_inode_info *ei = foo;
@@ -160,7 +183,7 @@ struct iso9660_options{
* Compute the hash for the isofs name corresponding to the dentry.
*/
static int
-isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms)
+isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms)
{
const char *name;
int len;
@@ -181,7 +204,7 @@ isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms)
* Compute the hash for the isofs name corresponding to the dentry.
*/
static int
-isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
+isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms)
{
const char *name;
int len;
@@ -206,100 +229,94 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
}
/*
- * Case insensitive compare of two isofs names.
- */
-static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a,
- struct qstr *b, int ms)
-{
- int alen, blen;
-
- /* A filename cannot end in '.' or we treat it like it has none */
- alen = a->len;
- blen = b->len;
- if (ms) {
- while (alen && a->name[alen-1] == '.')
- alen--;
- while (blen && b->name[blen-1] == '.')
- blen--;
- }
- if (alen == blen) {
- if (strnicmp(a->name, b->name, alen) == 0)
- return 0;
- }
- return 1;
-}
-
-/*
- * Case sensitive compare of two isofs names.
+ * Compare of two isofs names.
*/
-static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a,
- struct qstr *b, int ms)
+static int isofs_dentry_cmp_common(
+ unsigned int len, const char *str,
+ const struct qstr *name, int ms, int ci)
{
int alen, blen;
/* A filename cannot end in '.' or we treat it like it has none */
- alen = a->len;
- blen = b->len;
+ alen = name->len;
+ blen = len;
if (ms) {
- while (alen && a->name[alen-1] == '.')
+ while (alen && name->name[alen-1] == '.')
alen--;
- while (blen && b->name[blen-1] == '.')
+ while (blen && str[blen-1] == '.')
blen--;
}
if (alen == blen) {
- if (strncmp(a->name, b->name, alen) == 0)
- return 0;
+ if (ci) {
+ if (strnicmp(name->name, str, alen) == 0)
+ return 0;
+ } else {
+ if (strncmp(name->name, str, alen) == 0)
+ return 0;
+ }
}
return 1;
}
static int
-isofs_hash(struct dentry *dentry, struct qstr *qstr)
+isofs_hash(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
return isofs_hash_common(dentry, qstr, 0);
}
static int
-isofs_hashi(struct dentry *dentry, struct qstr *qstr)
+isofs_hashi(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
return isofs_hashi_common(dentry, qstr, 0);
}
static int
-isofs_dentry_cmp(struct dentry *dentry,struct qstr *a,struct qstr *b)
+isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- return isofs_dentry_cmp_common(dentry, a, b, 0);
+ return isofs_dentry_cmp_common(len, str, name, 0, 0);
}
static int
-isofs_dentry_cmpi(struct dentry *dentry,struct qstr *a,struct qstr *b)
+isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- return isofs_dentry_cmpi_common(dentry, a, b, 0);
+ return isofs_dentry_cmp_common(len, str, name, 0, 1);
}
#ifdef CONFIG_JOLIET
static int
-isofs_hash_ms(struct dentry *dentry, struct qstr *qstr)
+isofs_hash_ms(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
return isofs_hash_common(dentry, qstr, 1);
}
static int
-isofs_hashi_ms(struct dentry *dentry, struct qstr *qstr)
+isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
return isofs_hashi_common(dentry, qstr, 1);
}
static int
-isofs_dentry_cmp_ms(struct dentry *dentry,struct qstr *a,struct qstr *b)
+isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- return isofs_dentry_cmp_common(dentry, a, b, 1);
+ return isofs_dentry_cmp_common(len, str, name, 1, 0);
}
static int
-isofs_dentry_cmpi_ms(struct dentry *dentry,struct qstr *a,struct qstr *b)
+isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- return isofs_dentry_cmpi_common(dentry, a, b, 1);
+ return isofs_dentry_cmp_common(len, str, name, 1, 1);
}
#endif
@@ -932,7 +949,7 @@ root_found:
table += 2;
if (opt.check == 'r')
table++;
- s->s_root->d_op = &isofs_dentry_ops[table];
+ d_set_d_op(s->s_root, &isofs_dentry_ops[table]);
kfree(opt.iocharset);
@@ -1507,17 +1524,16 @@ struct inode *isofs_iget(struct super_block *sb,
return inode;
}
-static int isofs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *isofs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
}
static struct file_system_type iso9660_fs_type = {
.owner = THIS_MODULE,
.name = "iso9660",
- .get_sb = isofs_get_sb,
+ .mount = isofs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 0d23abf..679a849 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -37,7 +37,8 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen)
qstr.name = compare;
qstr.len = dlen;
- return dentry->d_op->d_compare(dentry, &dentry->d_name, &qstr);
+ return dentry->d_op->d_compare(NULL, NULL, NULL, NULL,
+ dentry->d_name.len, dentry->d_name.name, &qstr);
}
/*
@@ -171,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam
struct inode *inode;
struct page *page;
- dentry->d_op = dir->i_sb->s_root->d_op;
+ d_set_d_op(dentry, dir->i_sb->s_root->d_op);
page = alloc_page(GFP_USER);
if (!page)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 846a3f3..5b2e4c3 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -207,7 +207,7 @@ repeat_locked:
* the committing transaction. Really, we only need to give it
* committing_transaction->t_outstanding_credits plus "enough" for
* the log control blocks.
- * Also, this test is inconsitent with the matching one in
+ * Also, this test is inconsistent with the matching one in
* journal_extend().
*/
if (__log_space_left(journal) < jbd_space_needed(journal)) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 538417c..9e46869 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -43,6 +43,7 @@
#include <linux/vmalloc.h>
#include <linux/backing-dev.h>
#include <linux/bitops.h>
+#include <linux/ratelimit.h>
#define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h>
@@ -93,6 +94,7 @@ EXPORT_SYMBOL(jbd2_journal_file_inode);
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
+EXPORT_SYMBOL(jbd2_inode_cache);
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
static void __journal_abort_soft (journal_t *journal, int errno);
@@ -827,7 +829,7 @@ static journal_t * journal_init_common (void)
journal = kzalloc(sizeof(*journal), GFP_KERNEL);
if (!journal)
- goto fail;
+ return NULL;
init_waitqueue_head(&journal->j_wait_transaction_locked);
init_waitqueue_head(&journal->j_wait_logspace);
@@ -852,14 +854,12 @@ static journal_t * journal_init_common (void)
err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
if (err) {
kfree(journal);
- goto fail;
+ return NULL;
}
spin_lock_init(&journal->j_history_lock);
return journal;
-fail:
- return NULL;
}
/* jbd2_journal_init_dev and jbd2_journal_init_inode:
@@ -899,6 +899,14 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
/* journal descriptor can store up to n blocks -bzzz */
journal->j_blocksize = blocksize;
+ journal->j_dev = bdev;
+ journal->j_fs_dev = fs_dev;
+ journal->j_blk_offset = start;
+ journal->j_maxlen = len;
+ bdevname(journal->j_dev, journal->j_devname);
+ p = journal->j_devname;
+ while ((p = strchr(p, '/')))
+ *p = '!';
jbd2_stats_proc_init(journal);
n = journal->j_blocksize / sizeof(journal_block_tag_t);
journal->j_wbufsize = n;
@@ -908,14 +916,6 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
__func__);
goto out_err;
}
- journal->j_dev = bdev;
- journal->j_fs_dev = fs_dev;
- journal->j_blk_offset = start;
- journal->j_maxlen = len;
- bdevname(journal->j_dev, journal->j_devname);
- p = journal->j_devname;
- while ((p = strchr(p, '/')))
- *p = '!';
bh = __getblk(journal->j_dev, start, journal->j_blocksize);
if (!bh) {
@@ -1838,7 +1838,6 @@ size_t journal_tag_bytes(journal_t *journal)
*/
#define JBD2_MAX_SLABS 8
static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
-static DECLARE_MUTEX(jbd2_slab_create_sem);
static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
"jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
@@ -1859,6 +1858,7 @@ static void jbd2_journal_destroy_slabs(void)
static int jbd2_journal_create_slab(size_t size)
{
+ static DEFINE_MUTEX(jbd2_slab_create_mutex);
int i = order_base_2(size) - 10;
size_t slab_size;
@@ -1870,16 +1870,16 @@ static int jbd2_journal_create_slab(size_t size)
if (unlikely(i < 0))
i = 0;
- down(&jbd2_slab_create_sem);
+ mutex_lock(&jbd2_slab_create_mutex);
if (jbd2_slab[i]) {
- up(&jbd2_slab_create_sem);
+ mutex_unlock(&jbd2_slab_create_mutex);
return 0; /* Already created */
}
slab_size = 1 << (i+10);
jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
slab_size, 0, NULL);
- up(&jbd2_slab_create_sem);
+ mutex_unlock(&jbd2_slab_create_mutex);
if (!jbd2_slab[i]) {
printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
return -ENOMEM;
@@ -1982,7 +1982,6 @@ static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
static struct journal_head *journal_alloc_journal_head(void)
{
struct journal_head *ret;
- static unsigned long last_warning;
#ifdef CONFIG_JBD2_DEBUG
atomic_inc(&nr_journal_heads);
@@ -1990,11 +1989,7 @@ static struct journal_head *journal_alloc_journal_head(void)
ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
if (!ret) {
jbd_debug(1, "out of memory for journal_head\n");
- if (time_after(jiffies, last_warning + 5*HZ)) {
- printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
- __func__);
- last_warning = jiffies;
- }
+ pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
while (!ret) {
yield();
ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
@@ -2292,17 +2287,19 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void)
#endif
-struct kmem_cache *jbd2_handle_cache;
+struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
static int __init journal_init_handle_cache(void)
{
- jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle",
- sizeof(handle_t),
- 0, /* offset */
- SLAB_TEMPORARY, /* flags */
- NULL); /* ctor */
+ jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
if (jbd2_handle_cache == NULL) {
- printk(KERN_EMERG "JBD: failed to create handle cache\n");
+ printk(KERN_EMERG "JBD2: failed to create handle cache\n");
+ return -ENOMEM;
+ }
+ jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0);
+ if (jbd2_inode_cache == NULL) {
+ printk(KERN_EMERG "JBD2: failed to create inode cache\n");
+ kmem_cache_destroy(jbd2_handle_cache);
return -ENOMEM;
}
return 0;
@@ -2312,6 +2309,9 @@ static void jbd2_journal_destroy_handle_cache(void)
{
if (jbd2_handle_cache)
kmem_cache_destroy(jbd2_handle_cache);
+ if (jbd2_inode_cache)
+ kmem_cache_destroy(jbd2_inode_cache);
+
}
/*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 2bc4d5f..1cad869 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -299,10 +299,10 @@ int jbd2_journal_skip_recovery(journal_t *journal)
#ifdef CONFIG_JBD2_DEBUG
int dropped = info.end_transaction -
be32_to_cpu(journal->j_superblock->s_sequence);
-#endif
jbd_debug(1,
"JBD: ignoring %d transaction%s from the journal.\n",
dropped, (dropped == 1) ? "" : "s");
+#endif
journal->j_transaction_sequence = ++info.end_transaction;
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6bf0a24..faad2bd 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -251,7 +251,7 @@ repeat:
* the committing transaction. Really, we only need to give it
* committing_transaction->t_outstanding_credits plus "enough" for
* the log control blocks.
- * Also, this test is inconsitent with the matching one in
+ * Also, this test is inconsistent with the matching one in
* jbd2_journal_extend().
*/
if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
@@ -340,9 +340,7 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
jbd2_free_handle(handle);
current->journal_info = NULL;
handle = ERR_PTR(err);
- goto out;
}
-out:
return handle;
}
EXPORT_SYMBOL(jbd2__journal_start);
@@ -589,7 +587,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
transaction = handle->h_transaction;
journal = transaction->t_journal;
- jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
+ jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
JBUFFER_TRACE(jh, "entry");
repeat:
@@ -774,7 +772,7 @@ done:
J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
"Possible IO failure.\n");
page = jh2bh(jh)->b_page;
- offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
+ offset = offset_in_page(jh2bh(jh)->b_data);
source = kmap_atomic(page, KM_USER0);
/* Fire data frozen trigger just before we copy the data */
jbd2_buffer_frozen_trigger(jh, source + offset,
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 54a92fd..95b7967 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -259,11 +259,14 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
return rc;
}
-int jffs2_check_acl(struct inode *inode, int mask)
+int jffs2_check_acl(struct inode *inode, int mask, unsigned int flags)
{
struct posix_acl *acl;
int rc;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 5e42de8..3119f59 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,7 +26,7 @@ struct jffs2_acl_header {
#ifdef CONFIG_JFFS2_FS_POSIX_ACL
-extern int jffs2_check_acl(struct inode *, int);
+extern int jffs2_check_acl(struct inode *, int, unsigned int);
extern int jffs2_acl_chmod(struct inode *);
extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index a906f53..85c6be2 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -23,7 +23,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
static inline struct jffs2_inode_cache *
first_inode_chain(int *i, struct jffs2_sb_info *c)
{
- for (; *i < INOCACHE_HASHSIZE; (*i)++) {
+ for (; *i < c->inocache_hashsize; (*i)++) {
if (c->inocache_list[*i])
return c->inocache_list[*i];
}
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 617a1e5..de42470 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -103,7 +103,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
spin_unlock(&jffs2_compressor_list_lock);
*datalen = orig_slen;
*cdatalen = orig_dlen;
- compr_ret = this->compress(data_in, output_buf, datalen, cdatalen, NULL);
+ compr_ret = this->compress(data_in, output_buf, datalen, cdatalen);
spin_lock(&jffs2_compressor_list_lock);
this->usecount--;
if (!compr_ret) {
@@ -152,7 +152,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
spin_unlock(&jffs2_compressor_list_lock);
*datalen = orig_slen;
*cdatalen = orig_dlen;
- compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen, NULL);
+ compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen);
spin_lock(&jffs2_compressor_list_lock);
this->usecount--;
if (!compr_ret) {
@@ -220,7 +220,7 @@ int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
if (comprtype == this->compr) {
this->usecount++;
spin_unlock(&jffs2_compressor_list_lock);
- ret = this->decompress(cdata_in, data_out, cdatalen, datalen, NULL);
+ ret = this->decompress(cdata_in, data_out, cdatalen, datalen);
spin_lock(&jffs2_compressor_list_lock);
if (ret) {
printk(KERN_WARNING "Decompressor \"%s\" returned %d\n", this->name, ret);
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h
index e471a91..13bb759 100644
--- a/fs/jffs2/compr.h
+++ b/fs/jffs2/compr.h
@@ -49,9 +49,9 @@ struct jffs2_compressor {
char *name;
char compr; /* JFFS2_COMPR_XXX */
int (*compress)(unsigned char *data_in, unsigned char *cpage_out,
- uint32_t *srclen, uint32_t *destlen, void *model);
+ uint32_t *srclen, uint32_t *destlen);
int (*decompress)(unsigned char *cdata_in, unsigned char *data_out,
- uint32_t cdatalen, uint32_t datalen, void *model);
+ uint32_t cdatalen, uint32_t datalen);
int usecount;
int disabled; /* if set the compressor won't compress */
unsigned char *compr_buf; /* used by size compr. mode */
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index ed25ae7c..af186ee 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -42,7 +42,7 @@ static int __init alloc_workspace(void)
}
static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
- uint32_t *sourcelen, uint32_t *dstlen, void *model)
+ uint32_t *sourcelen, uint32_t *dstlen)
{
size_t compress_size;
int ret;
@@ -67,7 +67,7 @@ static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
}
static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out,
- uint32_t srclen, uint32_t destlen, void *model)
+ uint32_t srclen, uint32_t destlen)
{
size_t dl = destlen;
int ret;
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 9696ad9..16a5047 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -31,8 +31,7 @@
/* _compress returns the compressed size, -1 if bigger */
static int jffs2_rtime_compress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t *sourcelen, uint32_t *dstlen,
- void *model)
+ uint32_t *sourcelen, uint32_t *dstlen)
{
short positions[256];
int outpos = 0;
@@ -73,8 +72,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,
static int jffs2_rtime_decompress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t srclen, uint32_t destlen,
- void *model)
+ uint32_t srclen, uint32_t destlen)
{
short positions[256];
int outpos = 0;
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index a12b4f7..9e7cec8 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -298,7 +298,7 @@ static int rubin_do_compress(int bit_divider, int *bits, unsigned char *data_in,
#if 0
/* _compress returns the compressed size, -1 if bigger */
int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
- uint32_t *sourcelen, uint32_t *dstlen, void *model)
+ uint32_t *sourcelen, uint32_t *dstlen)
{
return rubin_do_compress(BIT_DIVIDER_MIPS, bits_mips, data_in,
cpage_out, sourcelen, dstlen);
@@ -306,8 +306,7 @@ int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
#endif
static int jffs2_dynrubin_compress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t *sourcelen, uint32_t *dstlen,
- void *model)
+ uint32_t *sourcelen, uint32_t *dstlen)
{
int bits[8];
unsigned char histo[256];
@@ -387,8 +386,7 @@ static void rubin_do_decompress(int bit_divider, int *bits,
static int jffs2_rubinmips_decompress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t sourcelen, uint32_t dstlen,
- void *model)
+ uint32_t sourcelen, uint32_t dstlen)
{
rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in,
cpage_out, sourcelen, dstlen);
@@ -397,8 +395,7 @@ static int jffs2_rubinmips_decompress(unsigned char *data_in,
static int jffs2_dynrubin_decompress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t sourcelen, uint32_t dstlen,
- void *model)
+ uint32_t sourcelen, uint32_t dstlen)
{
int bits[8];
int c;
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 97fc45d..fd05a0b 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -68,8 +68,7 @@ static void free_workspaces(void)
static int jffs2_zlib_compress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t *sourcelen, uint32_t *dstlen,
- void *model)
+ uint32_t *sourcelen, uint32_t *dstlen)
{
int ret;
@@ -136,8 +135,7 @@ static int jffs2_zlib_compress(unsigned char *data_in,
static int jffs2_zlib_decompress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t srclen, uint32_t destlen,
- void *model)
+ uint32_t srclen, uint32_t destlen)
{
int ret;
int wbits = MAX_WBITS;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 79121aa..9297865 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -367,7 +367,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
}
/* We use f->target field to store the target path. */
- f->target = kmalloc(targetlen + 1, GFP_KERNEL);
+ f->target = kmemdup(target, targetlen + 1, GFP_KERNEL);
if (!f->target) {
printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
mutex_unlock(&f->sem);
@@ -376,7 +376,6 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
goto fail;
}
- memcpy(f->target, target, targetlen + 1);
D1(printk(KERN_DEBUG "jffs2_symlink: symlink's target '%s' cached\n", (char *)f->target));
/* No data here. Only a metadata node, which will be
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index abac961..e513f19 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -151,7 +151,7 @@ int jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
}
/* Be nice */
- yield();
+ cond_resched();
mutex_lock(&c->erase_free_sem);
spin_lock(&c->erase_completion_lock);
}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index d9beb06..e896e67 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -474,6 +474,25 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
return inode;
}
+static int calculate_inocache_hashsize(uint32_t flash_size)
+{
+ /*
+ * Pick a inocache hash size based on the size of the medium.
+ * Count how many megabytes we're dealing with, apply a hashsize twice
+ * that size, but rounding down to the usual big powers of 2. And keep
+ * to sensible bounds.
+ */
+
+ int size_mb = flash_size / 1024 / 1024;
+ int hashsize = (size_mb * 2) & ~0x3f;
+
+ if (hashsize < INOCACHE_HASHSIZE_MIN)
+ return INOCACHE_HASHSIZE_MIN;
+ if (hashsize > INOCACHE_HASHSIZE_MAX)
+ return INOCACHE_HASHSIZE_MAX;
+
+ return hashsize;
+}
int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
{
@@ -520,7 +539,8 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
if (ret)
return ret;
- c->inocache_list = kcalloc(INOCACHE_HASHSIZE, sizeof(struct jffs2_inode_cache *), GFP_KERNEL);
+ c->inocache_hashsize = calculate_inocache_hashsize(c->flash_size);
+ c->inocache_list = kcalloc(c->inocache_hashsize, sizeof(struct jffs2_inode_cache *), GFP_KERNEL);
if (!c->inocache_list) {
ret = -ENOMEM;
goto out_wbuf;
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 846a794..31dce61133 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -219,13 +219,14 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
if (!list_empty(&c->erase_complete_list) ||
!list_empty(&c->erase_pending_list)) {
spin_unlock(&c->erase_completion_lock);
+ mutex_unlock(&c->alloc_sem);
D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
- if (jffs2_erase_pending_blocks(c, 1)) {
- mutex_unlock(&c->alloc_sem);
+ if (jffs2_erase_pending_blocks(c, 1))
return 0;
- }
+
D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
spin_lock(&c->erase_completion_lock);
+ mutex_lock(&c->alloc_sem);
}
/* First, work out which block we're garbage-collecting */
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 6784bc8..f864005 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -100,6 +100,7 @@ struct jffs2_sb_info {
wait_queue_head_t erase_wait; /* For waiting for erases to complete */
wait_queue_head_t inocache_wq;
+ int inocache_hashsize;
struct jffs2_inode_cache **inocache_list;
spinlock_t inocache_lock;
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index af02bd1..5e03233 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -420,7 +420,7 @@ struct jffs2_inode_cache *jffs2_get_ino_cache(struct jffs2_sb_info *c, uint32_t
{
struct jffs2_inode_cache *ret;
- ret = c->inocache_list[ino % INOCACHE_HASHSIZE];
+ ret = c->inocache_list[ino % c->inocache_hashsize];
while (ret && ret->ino < ino) {
ret = ret->next;
}
@@ -441,7 +441,7 @@ void jffs2_add_ino_cache (struct jffs2_sb_info *c, struct jffs2_inode_cache *new
dbg_inocache("add %p (ino #%u)\n", new, new->ino);
- prev = &c->inocache_list[new->ino % INOCACHE_HASHSIZE];
+ prev = &c->inocache_list[new->ino % c->inocache_hashsize];
while ((*prev) && (*prev)->ino < new->ino) {
prev = &(*prev)->next;
@@ -462,7 +462,7 @@ void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old)
dbg_inocache("del %p (ino #%u)\n", old, old->ino);
spin_lock(&c->inocache_lock);
- prev = &c->inocache_list[old->ino % INOCACHE_HASHSIZE];
+ prev = &c->inocache_list[old->ino % c->inocache_hashsize];
while ((*prev) && (*prev)->ino < old->ino) {
prev = &(*prev)->next;
@@ -487,7 +487,7 @@ void jffs2_free_ino_caches(struct jffs2_sb_info *c)
int i;
struct jffs2_inode_cache *this, *next;
- for (i=0; i<INOCACHE_HASHSIZE; i++) {
+ for (i=0; i < c->inocache_hashsize; i++) {
this = c->inocache_list[i];
while (this) {
next = this->next;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 523a916..5a53d9b 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -199,7 +199,8 @@ struct jffs2_inode_cache {
#define RAWNODE_CLASS_XATTR_DATUM 1
#define RAWNODE_CLASS_XATTR_REF 2
-#define INOCACHE_HASHSIZE 128
+#define INOCACHE_HASHSIZE_MIN 128
+#define INOCACHE_HASHSIZE_MAX 1024
#define write_ofs(c) ((c)->nextblock->offset + (c)->sector_size - (c)->nextblock->free_size)
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 46f870d..b632ddd 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -20,7 +20,7 @@
#include "summary.h"
#include "debug.h"
-#define DEFAULT_EMPTY_SCAN_SIZE 1024
+#define DEFAULT_EMPTY_SCAN_SIZE 256
#define noisy_printk(noise, args...) do { \
if (*(noise)) { \
@@ -435,7 +435,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
unsigned char *buf, uint32_t buf_size, struct jffs2_summary *s) {
struct jffs2_unknown_node *node;
struct jffs2_unknown_node crcnode;
- uint32_t ofs, prevofs;
+ uint32_t ofs, prevofs, max_ofs;
uint32_t hdr_crc, buf_ofs, buf_len;
int err;
int noise = 0;
@@ -550,12 +550,12 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
/* We temporarily use 'ofs' as a pointer into the buffer/jeb */
ofs = 0;
-
- /* Scan only 4KiB of 0xFF before declaring it's empty */
- while(ofs < EMPTY_SCAN_SIZE(c->sector_size) && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF)
+ max_ofs = EMPTY_SCAN_SIZE(c->sector_size);
+ /* Scan only EMPTY_SCAN_SIZE of 0xFF before declaring it's empty */
+ while(ofs < max_ofs && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF)
ofs += 4;
- if (ofs == EMPTY_SCAN_SIZE(c->sector_size)) {
+ if (ofs == max_ofs) {
#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
if (jffs2_cleanmarker_oob(c)) {
/* scan oob, take care of cleanmarker */
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index d1ae5df..853b8e3 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -40,11 +40,18 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
return &f->vfs_inode;
}
-static void jffs2_destroy_inode(struct inode *inode)
+static void jffs2_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
}
+static void jffs2_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, jffs2_i_callback);
+}
+
static void jffs2_i_init_once(void *foo)
{
struct jffs2_inode_info *f = foo;
@@ -179,12 +186,11 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
return ret;
}
-static int jffs2_get_sb(struct file_system_type *fs_type,
+static struct dentry *jffs2_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super,
- mnt);
+ return mount_mtd(fs_type, flags, dev_name, data, jffs2_fill_super);
}
static void jffs2_put_super (struct super_block *sb)
@@ -229,7 +235,7 @@ static void jffs2_kill_sb(struct super_block *sb)
static struct file_system_type jffs2_fs_type = {
.owner = THIS_MODULE,
.name = "jffs2",
- .get_sb = jffs2_get_sb,
+ .mount = jffs2_mount,
.kill_sb = jffs2_kill_sb,
};
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 1057a49..e5de942 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -114,10 +114,14 @@ out:
return rc;
}
-int jfs_check_acl(struct inode *inode, int mask)
+int jfs_check_acl(struct inode *inode, int mask, unsigned int flags)
{
- struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
+ struct posix_acl *acl;
+
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+ acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 54e0755..f9285c4 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
#ifdef CONFIG_JFS_POSIX_ACL
-int jfs_check_acl(struct inode *, int);
+int jfs_check_acl(struct inode *, int, unsigned int flags);
int jfs_init_acl(tid_t, struct inode *, struct inode *);
int jfs_acl_chmod(struct inode *inode);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 231ca4a..4414e3a 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -18,6 +18,7 @@
*/
#include <linux/fs.h>
+#include <linux/namei.h>
#include <linux/ctype.h>
#include <linux/quotaops.h>
#include <linux/exportfs.h>
@@ -1465,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
jfs_info("jfs_lookup: name = %s", name);
if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)
- dentry->d_op = &jfs_ci_dentry_operations;
+ d_set_d_op(dentry, &jfs_ci_dentry_operations);
if ((name[0] == '.') && (len == 1))
inum = dip->i_ino;
@@ -1494,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
dentry = d_splice_alias(ip, dentry);
if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
- dentry->d_op = &jfs_ci_dentry_operations;
+ d_set_d_op(dentry, &jfs_ci_dentry_operations);
return dentry;
}
@@ -1573,7 +1574,8 @@ const struct file_operations jfs_dir_operations = {
.llseek = generic_file_llseek,
};
-static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
+static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode,
+ struct qstr *this)
{
unsigned long hash;
int i;
@@ -1586,32 +1588,63 @@ static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
return 0;
}
-static int jfs_ci_compare(struct dentry *dir, struct qstr *a, struct qstr *b)
+static int jfs_ci_compare(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
int i, result = 1;
- if (a->len != b->len)
+ if (len != name->len)
goto out;
- for (i=0; i < a->len; i++) {
- if (tolower(a->name[i]) != tolower(b->name[i]))
+ for (i=0; i < len; i++) {
+ if (tolower(str[i]) != tolower(name->name[i]))
goto out;
}
result = 0;
+out:
+ return result;
+}
+static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
/*
- * We want creates to preserve case. A negative dentry, a, that
- * has a different case than b may cause a new entry to be created
- * with the wrong case. Since we can't tell if a comes from a negative
- * dentry, we blindly replace it with b. This should be harmless if
- * a is not a negative dentry.
+ * This is not negative dentry. Always valid.
+ *
+ * Note, rename() to existing directory entry will have ->d_inode,
+ * and will use existing name which isn't specified name by user.
+ *
+ * We may be able to drop this positive dentry here. But dropping
+ * positive dentry isn't good idea. So it's unsupported like
+ * rename("filename", "FILENAME") for now.
*/
- memcpy((unsigned char *)a->name, b->name, a->len);
-out:
- return result;
+ if (dentry->d_inode)
+ return 1;
+
+ /*
+ * This may be nfsd (or something), anyway, we can't see the
+ * intent of this. So, since this can be for creation, drop it.
+ */
+ if (!nd)
+ return 0;
+
+ /*
+ * Drop the negative dentry, in order to make sure to use the
+ * case sensitive name which is specified by user if this is
+ * for creation.
+ */
+ if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
+ if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+ return 0;
+ }
+ return 1;
}
const struct dentry_operations jfs_ci_dentry_operations =
{
.d_hash = jfs_ci_hash,
.d_compare = jfs_ci_compare,
+ .d_revalidate = jfs_ci_revalidate,
};
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 68eee2b..3150d76 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -115,6 +115,14 @@ static struct inode *jfs_alloc_inode(struct super_block *sb)
return &jfs_inode->vfs_inode;
}
+static void jfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct jfs_inode_info *ji = JFS_IP(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(jfs_inode_cachep, ji);
+}
+
static void jfs_destroy_inode(struct inode *inode)
{
struct jfs_inode_info *ji = JFS_IP(inode);
@@ -128,7 +136,7 @@ static void jfs_destroy_inode(struct inode *inode)
ji->active_ag = -1;
}
spin_unlock_irq(&ji->ag_lock);
- kmem_cache_free(jfs_inode_cachep, ji);
+ call_rcu(&inode->i_rcu, jfs_i_callback);
}
static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -517,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
goto out_no_root;
if (sbi->mntflag & JFS_OS2)
- sb->s_root->d_op = &jfs_ci_dentry_operations;
+ d_set_d_op(sb->s_root, &jfs_ci_dentry_operations);
/* logical blocks are represented by 40 bits in pxd_t, etc. */
sb->s_maxbytes = ((u64) sb->s_blocksize) << 40;
@@ -583,11 +591,10 @@ static int jfs_unfreeze(struct super_block *sb)
return 0;
}
-static int jfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
}
static int jfs_sync_fs(struct super_block *sb, int wait)
@@ -770,7 +777,7 @@ static const struct export_operations jfs_export_operations = {
static struct file_system_type jfs_fs_type = {
.owner = THIS_MODULE,
.name = "jfs",
- .get_sb = jfs_get_sb,
+ .mount = jfs_do_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/libfs.c b/fs/libfs.c
index 304a513..889311e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -16,6 +16,11 @@
#include <asm/uaccess.h>
+static inline int simple_positive(struct dentry *dentry)
+{
+ return dentry->d_inode && !d_unhashed(dentry);
+}
+
int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
@@ -37,7 +42,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
* Retaining negative dentries for an in-memory filesystem just wastes
* memory and lookup time: arrange for them to be deleted immediately.
*/
-static int simple_delete_dentry(struct dentry *dentry)
+static int simple_delete_dentry(const struct dentry *dentry)
{
return 1;
}
@@ -54,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
- dentry->d_op = &simple_dentry_operations;
+ d_set_d_op(dentry, &simple_dentry_operations);
d_add(dentry, NULL);
return NULL;
}
@@ -76,7 +81,8 @@ int dcache_dir_close(struct inode *inode, struct file *file)
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
{
- mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
+ struct dentry *dentry = file->f_path.dentry;
+ mutex_lock(&dentry->d_inode->i_mutex);
switch (origin) {
case 1:
offset += file->f_pos;
@@ -84,7 +90,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
if (offset >= 0)
break;
default:
- mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+ mutex_unlock(&dentry->d_inode->i_mutex);
return -EINVAL;
}
if (offset != file->f_pos) {
@@ -94,21 +100,24 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
struct dentry *cursor = file->private_data;
loff_t n = file->f_pos - 2;
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
+ /* d_lock not required for cursor */
list_del(&cursor->d_u.d_child);
- p = file->f_path.dentry->d_subdirs.next;
- while (n && p != &file->f_path.dentry->d_subdirs) {
+ p = dentry->d_subdirs.next;
+ while (n && p != &dentry->d_subdirs) {
struct dentry *next;
next = list_entry(p, struct dentry, d_u.d_child);
- if (!d_unhashed(next) && next->d_inode)
+ spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
+ if (simple_positive(next))
n--;
+ spin_unlock(&next->d_lock);
p = p->next;
}
list_add_tail(&cursor->d_u.d_child, p);
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
}
}
- mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+ mutex_unlock(&dentry->d_inode->i_mutex);
return offset;
}
@@ -148,29 +157,35 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
i++;
/* fallthrough */
default:
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
if (filp->f_pos == 2)
list_move(q, &dentry->d_subdirs);
for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
struct dentry *next;
next = list_entry(p, struct dentry, d_u.d_child);
- if (d_unhashed(next) || !next->d_inode)
+ spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
+ if (!simple_positive(next)) {
+ spin_unlock(&next->d_lock);
continue;
+ }
- spin_unlock(&dcache_lock);
+ spin_unlock(&next->d_lock);
+ spin_unlock(&dentry->d_lock);
if (filldir(dirent, next->d_name.name,
next->d_name.len, filp->f_pos,
next->d_inode->i_ino,
dt_type(next->d_inode)) < 0)
return 0;
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
+ spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
/* next is still alive */
list_move(q, p);
+ spin_unlock(&next->d_lock);
p = q;
filp->f_pos++;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
}
return 0;
}
@@ -201,9 +216,8 @@ static const struct super_operations simple_super_operations = {
* Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
* will never be mountable)
*/
-int get_sb_pseudo(struct file_system_type *fs_type, char *name,
- const struct super_operations *ops, unsigned long magic,
- struct vfsmount *mnt)
+struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
+ const struct super_operations *ops, unsigned long magic)
{
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
struct dentry *dentry;
@@ -211,7 +225,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
struct qstr d_name = {.name = name, .len = strlen(name)};
if (IS_ERR(s))
- return PTR_ERR(s);
+ return ERR_CAST(s);
s->s_flags = MS_NOUSER;
s->s_maxbytes = MAX_LFS_FILESIZE;
@@ -241,12 +255,11 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
d_instantiate(dentry, root);
s->s_root = dentry;
s->s_flags |= MS_ACTIVE;
- simple_set_mnt(mnt, s);
- return 0;
+ return dget(s->s_root);
Enomem:
deactivate_locked_super(s);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
@@ -261,23 +274,23 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
return 0;
}
-static inline int simple_positive(struct dentry *dentry)
-{
- return dentry->d_inode && !d_unhashed(dentry);
-}
-
int simple_empty(struct dentry *dentry)
{
struct dentry *child;
int ret = 0;
- spin_lock(&dcache_lock);
- list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
- if (simple_positive(child))
+ spin_lock(&dentry->d_lock);
+ list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
+ spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+ if (simple_positive(child)) {
+ spin_unlock(&child->d_lock);
goto out;
+ }
+ spin_unlock(&child->d_lock);
+ }
ret = 1;
out:
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
return ret;
}
@@ -951,7 +964,7 @@ EXPORT_SYMBOL(dcache_dir_lseek);
EXPORT_SYMBOL(dcache_dir_open);
EXPORT_SYMBOL(dcache_readdir);
EXPORT_SYMBOL(generic_read_dir);
-EXPORT_SYMBOL(get_sb_pseudo);
+EXPORT_SYMBOL(mount_pseudo);
EXPORT_SYMBOL(simple_write_begin);
EXPORT_SYMBOL(simple_write_end);
EXPORT_SYMBOL(simple_dir_inode_operations);
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index 97f6073..ca58d64 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -4,7 +4,7 @@
obj-$(CONFIG_LOCKD) += lockd.o
-lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \
- svcproc.o svcsubs.o mon.o xdr.o grace.o
-lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o
+lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
+ svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o
+lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
lockd-objs := $(lockd-objs-y)
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
new file mode 100644
index 0000000..f848b52
--- /dev/null
+++ b/fs/lockd/clnt4xdr.c
@@ -0,0 +1,605 @@
+/*
+ * linux/fs/lockd/clnt4xdr.c
+ *
+ * XDR functions to encode/decode NLM version 4 RPC arguments and results.
+ *
+ * NLM client-side only.
+ *
+ * Copyright (C) 2010, Oracle. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/lockd/lockd.h>
+
+#define NLMDBG_FACILITY NLMDBG_XDR
+
+#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
+# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
+#endif
+
+#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN)
+# error "NLM host name cannot be larger than NLM's maximum string length!"
+#endif
+
+/*
+ * Declare the space requirements for NLM arguments and replies as
+ * number of 32bit-words
+ */
+#define NLM4_void_sz (0)
+#define NLM4_cookie_sz (1+(NLM_MAXCOOKIELEN>>2))
+#define NLM4_caller_sz (1+(NLMCLNT_OHSIZE>>2))
+#define NLM4_owner_sz (1+(NLMCLNT_OHSIZE>>2))
+#define NLM4_fhandle_sz (1+(NFS3_FHSIZE>>2))
+#define NLM4_lock_sz (5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz)
+#define NLM4_holder_sz (6+NLM4_owner_sz)
+
+#define NLM4_testargs_sz (NLM4_cookie_sz+1+NLM4_lock_sz)
+#define NLM4_lockargs_sz (NLM4_cookie_sz+4+NLM4_lock_sz)
+#define NLM4_cancargs_sz (NLM4_cookie_sz+2+NLM4_lock_sz)
+#define NLM4_unlockargs_sz (NLM4_cookie_sz+NLM4_lock_sz)
+
+#define NLM4_testres_sz (NLM4_cookie_sz+1+NLM4_holder_sz)
+#define NLM4_res_sz (NLM4_cookie_sz+1)
+#define NLM4_norep_sz (0)
+
+
+static s64 loff_t_to_s64(loff_t offset)
+{
+ s64 res;
+
+ if (offset >= NLM4_OFFSET_MAX)
+ res = NLM4_OFFSET_MAX;
+ else if (offset <= -NLM4_OFFSET_MAX)
+ res = -NLM4_OFFSET_MAX;
+ else
+ res = offset;
+ return res;
+}
+
+static void nlm4_compute_offsets(const struct nlm_lock *lock,
+ u64 *l_offset, u64 *l_len)
+{
+ const struct file_lock *fl = &lock->fl;
+
+ BUG_ON(fl->fl_start > NLM4_OFFSET_MAX);
+ BUG_ON(fl->fl_end > NLM4_OFFSET_MAX &&
+ fl->fl_end != OFFSET_MAX);
+
+ *l_offset = loff_t_to_s64(fl->fl_start);
+ if (fl->fl_end == OFFSET_MAX)
+ *l_len = 0;
+ else
+ *l_len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
+}
+
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+ dprintk("lockd: %s prematurely hit the end of our receive buffer. "
+ "Remaining buffer length is %tu words.\n",
+ func, xdr->end - xdr->p);
+}
+
+
+/*
+ * Encode/decode NLMv4 basic data types
+ *
+ * Basic NLMv4 data types are defined in Appendix II, section 6.1.4
+ * of RFC 1813: "NFS Version 3 Protocol Specification" and in Chapter
+ * 10 of X/Open's "Protocols for Interworking: XNFS, Version 3W".
+ *
+ * Not all basic data types have their own encoding and decoding
+ * functions. For run-time efficiency, some data types are encoded
+ * or decoded inline.
+ */
+
+static void encode_bool(struct xdr_stream *xdr, const int value)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p = value ? xdr_one : xdr_zero;
+}
+
+static void encode_int32(struct xdr_stream *xdr, const s32 value)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(value);
+}
+
+/*
+ * typedef opaque netobj<MAXNETOBJ_SZ>
+ */
+static void encode_netobj(struct xdr_stream *xdr,
+ const u8 *data, const unsigned int length)
+{
+ __be32 *p;
+
+ BUG_ON(length > XDR_MAX_NETOBJ);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, data, length);
+}
+
+static int decode_netobj(struct xdr_stream *xdr,
+ struct xdr_netobj *obj)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ length = be32_to_cpup(p++);
+ if (unlikely(length > XDR_MAX_NETOBJ))
+ goto out_size;
+ obj->len = length;
+ obj->data = (u8 *)p;
+ return 0;
+out_size:
+ dprintk("NFS: returned netobj was too long: %u\n", length);
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * netobj cookie;
+ */
+static void encode_cookie(struct xdr_stream *xdr,
+ const struct nlm_cookie *cookie)
+{
+ BUG_ON(cookie->len > NLM_MAXCOOKIELEN);
+ encode_netobj(xdr, (u8 *)&cookie->data, cookie->len);
+}
+
+static int decode_cookie(struct xdr_stream *xdr,
+ struct nlm_cookie *cookie)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ length = be32_to_cpup(p++);
+ /* apparently HPUX can return empty cookies */
+ if (length == 0)
+ goto out_hpux;
+ if (length > NLM_MAXCOOKIELEN)
+ goto out_size;
+ p = xdr_inline_decode(xdr, length);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ cookie->len = length;
+ memcpy(cookie->data, p, length);
+ return 0;
+out_hpux:
+ cookie->len = 4;
+ memset(cookie->data, 0, 4);
+ return 0;
+out_size:
+ dprintk("NFS: returned cookie was too long: %u\n", length);
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * netobj fh;
+ */
+static void encode_fh(struct xdr_stream *xdr, const struct nfs_fh *fh)
+{
+ BUG_ON(fh->size > NFS3_FHSIZE);
+ encode_netobj(xdr, (u8 *)&fh->data, fh->size);
+}
+
+/*
+ * enum nlm4_stats {
+ * NLM4_GRANTED = 0,
+ * NLM4_DENIED = 1,
+ * NLM4_DENIED_NOLOCKS = 2,
+ * NLM4_BLOCKED = 3,
+ * NLM4_DENIED_GRACE_PERIOD = 4,
+ * NLM4_DEADLCK = 5,
+ * NLM4_ROFS = 6,
+ * NLM4_STALE_FH = 7,
+ * NLM4_FBIG = 8,
+ * NLM4_FAILED = 9
+ * };
+ *
+ * struct nlm4_stat {
+ * nlm4_stats stat;
+ * };
+ *
+ * NB: we don't swap bytes for the NLM status values. The upper
+ * layers deal directly with the status value in network byte
+ * order.
+ */
+static void encode_nlm4_stat(struct xdr_stream *xdr,
+ const __be32 stat)
+{
+ __be32 *p;
+
+ BUG_ON(be32_to_cpu(stat) > NLM_FAILED);
+ p = xdr_reserve_space(xdr, 4);
+ *p = stat;
+}
+
+static int decode_nlm4_stat(struct xdr_stream *xdr, __be32 *stat)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (unlikely(*p > nlm4_failed))
+ goto out_bad_xdr;
+ *stat = *p;
+ return 0;
+out_bad_xdr:
+ dprintk("%s: server returned invalid nlm4_stats value: %u\n",
+ __func__, be32_to_cpup(p));
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * struct nlm4_holder {
+ * bool exclusive;
+ * int32 svid;
+ * netobj oh;
+ * uint64 l_offset;
+ * uint64 l_len;
+ * };
+ */
+static void encode_nlm4_holder(struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ const struct nlm_lock *lock = &result->lock;
+ u64 l_offset, l_len;
+ __be32 *p;
+
+ encode_bool(xdr, lock->fl.fl_type == F_RDLCK);
+ encode_int32(xdr, lock->svid);
+ encode_netobj(xdr, lock->oh.data, lock->oh.len);
+
+ p = xdr_reserve_space(xdr, 4 + 4);
+ nlm4_compute_offsets(lock, &l_offset, &l_len);
+ p = xdr_encode_hyper(p, l_offset);
+ xdr_encode_hyper(p, l_len);
+}
+
+static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
+{
+ struct nlm_lock *lock = &result->lock;
+ struct file_lock *fl = &lock->fl;
+ u64 l_offset, l_len;
+ u32 exclusive;
+ int error;
+ __be32 *p;
+ s32 end;
+
+ memset(lock, 0, sizeof(*lock));
+ locks_init_lock(fl);
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ exclusive = be32_to_cpup(p++);
+ lock->svid = be32_to_cpup(p);
+ fl->fl_pid = (pid_t)lock->svid;
+
+ error = decode_netobj(xdr, &lock->oh);
+ if (unlikely(error))
+ goto out;
+
+ p = xdr_inline_decode(xdr, 8 + 8);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+
+ fl->fl_flags = FL_POSIX;
+ fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
+ p = xdr_decode_hyper(p, &l_offset);
+ xdr_decode_hyper(p, &l_len);
+ end = l_offset + l_len - 1;
+
+ fl->fl_start = (loff_t)l_offset;
+ if (l_len == 0 || end < 0)
+ fl->fl_end = OFFSET_MAX;
+ else
+ fl->fl_end = (loff_t)end;
+ error = 0;
+out:
+ return error;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * string caller_name<LM_MAXSTRLEN>;
+ */
+static void encode_caller_name(struct xdr_stream *xdr, const char *name)
+{
+ /* NB: client-side does not set lock->len */
+ u32 length = strlen(name);
+ __be32 *p;
+
+ BUG_ON(length > NLM_MAXSTRLEN);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, name, length);
+}
+
+/*
+ * struct nlm4_lock {
+ * string caller_name<LM_MAXSTRLEN>;
+ * netobj fh;
+ * netobj oh;
+ * int32 svid;
+ * uint64 l_offset;
+ * uint64 l_len;
+ * };
+ */
+static void encode_nlm4_lock(struct xdr_stream *xdr,
+ const struct nlm_lock *lock)
+{
+ u64 l_offset, l_len;
+ __be32 *p;
+
+ encode_caller_name(xdr, lock->caller);
+ encode_fh(xdr, &lock->fh);
+ encode_netobj(xdr, lock->oh.data, lock->oh.len);
+
+ p = xdr_reserve_space(xdr, 4 + 8 + 8);
+ *p++ = cpu_to_be32(lock->svid);
+
+ nlm4_compute_offsets(lock, &l_offset, &l_len);
+ p = xdr_encode_hyper(p, l_offset);
+ xdr_encode_hyper(p, l_len);
+}
+
+
+/*
+ * NLMv4 XDR encode functions
+ *
+ * NLMv4 argument types are defined in Appendix II of RFC 1813:
+ * "NFS Version 3 Protocol Specification" and Chapter 10 of X/Open's
+ * "Protocols for Interworking: XNFS, Version 3W".
+ */
+
+/*
+ * struct nlm4_testargs {
+ * netobj cookie;
+ * bool exclusive;
+ * struct nlm4_lock alock;
+ * };
+ */
+static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_nlm4_lock(xdr, lock);
+}
+
+/*
+ * struct nlm4_lockargs {
+ * netobj cookie;
+ * bool block;
+ * bool exclusive;
+ * struct nlm4_lock alock;
+ * bool reclaim;
+ * int state;
+ * };
+ */
+static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_bool(xdr, args->block);
+ encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_nlm4_lock(xdr, lock);
+ encode_bool(xdr, args->reclaim);
+ encode_int32(xdr, args->state);
+}
+
+/*
+ * struct nlm4_cancargs {
+ * netobj cookie;
+ * bool block;
+ * bool exclusive;
+ * struct nlm4_lock alock;
+ * };
+ */
+static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_bool(xdr, args->block);
+ encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_nlm4_lock(xdr, lock);
+}
+
+/*
+ * struct nlm4_unlockargs {
+ * netobj cookie;
+ * struct nlm4_lock alock;
+ * };
+ */
+static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_nlm4_lock(xdr, lock);
+}
+
+/*
+ * struct nlm4_res {
+ * netobj cookie;
+ * nlm4_stat stat;
+ * };
+ */
+static void nlm4_xdr_enc_res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ encode_cookie(xdr, &result->cookie);
+ encode_nlm4_stat(xdr, result->status);
+}
+
+/*
+ * union nlm4_testrply switch (nlm4_stats stat) {
+ * case NLM4_DENIED:
+ * struct nlm4_holder holder;
+ * default:
+ * void;
+ * };
+ *
+ * struct nlm4_testres {
+ * netobj cookie;
+ * nlm4_testrply test_stat;
+ * };
+ */
+static void nlm4_xdr_enc_testres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ encode_cookie(xdr, &result->cookie);
+ encode_nlm4_stat(xdr, result->status);
+ if (result->status == nlm_lck_denied)
+ encode_nlm4_holder(xdr, result);
+}
+
+
+/*
+ * NLMv4 XDR decode functions
+ *
+ * NLMv4 argument types are defined in Appendix II of RFC 1813:
+ * "NFS Version 3 Protocol Specification" and Chapter 10 of X/Open's
+ * "Protocols for Interworking: XNFS, Version 3W".
+ */
+
+/*
+ * union nlm4_testrply switch (nlm4_stats stat) {
+ * case NLM4_DENIED:
+ * struct nlm4_holder holder;
+ * default:
+ * void;
+ * };
+ *
+ * struct nlm4_testres {
+ * netobj cookie;
+ * nlm4_testrply test_stat;
+ * };
+ */
+static int decode_nlm4_testrply(struct xdr_stream *xdr,
+ struct nlm_res *result)
+{
+ int error;
+
+ error = decode_nlm4_stat(xdr, &result->status);
+ if (unlikely(error))
+ goto out;
+ if (result->status == nlm_lck_denied)
+ error = decode_nlm4_holder(xdr, result);
+out:
+ return error;
+}
+
+static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nlm_res *result)
+{
+ int error;
+
+ error = decode_cookie(xdr, &result->cookie);
+ if (unlikely(error))
+ goto out;
+ error = decode_nlm4_testrply(xdr, result);
+out:
+ return error;
+}
+
+/*
+ * struct nlm4_res {
+ * netobj cookie;
+ * nlm4_stat stat;
+ * };
+ */
+static int nlm4_xdr_dec_res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nlm_res *result)
+{
+ int error;
+
+ error = decode_cookie(xdr, &result->cookie);
+ if (unlikely(error))
+ goto out;
+ error = decode_nlm4_stat(xdr, &result->status);
+out:
+ return error;
+}
+
+
+/*
+ * For NLM, a void procedure really returns nothing
+ */
+#define nlm4_xdr_dec_norep NULL
+
+#define PROC(proc, argtype, restype) \
+[NLMPROC_##proc] = { \
+ .p_proc = NLMPROC_##proc, \
+ .p_encode = (kxdreproc_t)nlm4_xdr_enc_##argtype, \
+ .p_decode = (kxdrdproc_t)nlm4_xdr_dec_##restype, \
+ .p_arglen = NLM4_##argtype##_sz, \
+ .p_replen = NLM4_##restype##_sz, \
+ .p_statidx = NLMPROC_##proc, \
+ .p_name = #proc, \
+ }
+
+static struct rpc_procinfo nlm4_procedures[] = {
+ PROC(TEST, testargs, testres),
+ PROC(LOCK, lockargs, res),
+ PROC(CANCEL, cancargs, res),
+ PROC(UNLOCK, unlockargs, res),
+ PROC(GRANTED, testargs, res),
+ PROC(TEST_MSG, testargs, norep),
+ PROC(LOCK_MSG, lockargs, norep),
+ PROC(CANCEL_MSG, cancargs, norep),
+ PROC(UNLOCK_MSG, unlockargs, norep),
+ PROC(GRANTED_MSG, testargs, norep),
+ PROC(TEST_RES, testres, norep),
+ PROC(LOCK_RES, res, norep),
+ PROC(CANCEL_RES, res, norep),
+ PROC(UNLOCK_RES, res, norep),
+ PROC(GRANTED_RES, res, norep),
+};
+
+struct rpc_version nlm_version4 = {
+ .number = 4,
+ .nrprocs = ARRAY_SIZE(nlm4_procedures),
+ .procs = nlm4_procedures,
+};
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index d5bb868..8d4ea83 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -14,7 +14,6 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/lockd/lockd.h>
-#include <linux/smp_lock.h>
#include <linux/kthread.h>
#define NLMDBG_FACILITY NLMDBG_CLIENT
@@ -80,7 +79,7 @@ EXPORT_SYMBOL_GPL(nlmclnt_init);
*/
void nlmclnt_done(struct nlm_host *host)
{
- nlm_release_host(host);
+ nlmclnt_release_host(host);
lockd_down();
}
EXPORT_SYMBOL_GPL(nlmclnt_done);
@@ -274,7 +273,7 @@ restart:
spin_unlock(&nlm_blocked_lock);
/* Release host handle after use */
- nlm_release_host(host);
+ nlmclnt_release_host(host);
lockd_down();
return 0;
}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 47ea1e1..adb45ec 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -7,7 +7,6 @@
*/
#include <linux/module.h>
-#include <linux/smp_lock.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/errno.h>
@@ -59,7 +58,7 @@ static void nlm_put_lockowner(struct nlm_lockowner *lockowner)
return;
list_del(&lockowner->list);
spin_unlock(&lockowner->host->h_lock);
- nlm_release_host(lockowner->host);
+ nlmclnt_release_host(lockowner->host);
kfree(lockowner);
}
@@ -208,22 +207,22 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
printk("nlm_alloc_call: failed, waiting for memory\n");
schedule_timeout_interruptible(5*HZ);
}
- nlm_release_host(host);
+ nlmclnt_release_host(host);
return NULL;
}
-void nlm_release_call(struct nlm_rqst *call)
+void nlmclnt_release_call(struct nlm_rqst *call)
{
if (!atomic_dec_and_test(&call->a_count))
return;
- nlm_release_host(call->a_host);
+ nlmclnt_release_host(call->a_host);
nlmclnt_release_lockargs(call);
kfree(call);
}
static void nlmclnt_rpc_release(void *data)
{
- nlm_release_call(data);
+ nlmclnt_release_call(data);
}
static int nlm_wait_on_grace(wait_queue_head_t *queue)
@@ -437,7 +436,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
status = nlm_stat_to_errno(req->a_res.status);
}
out:
- nlm_release_call(req);
+ nlmclnt_release_call(req);
return status;
}
@@ -594,7 +593,7 @@ again:
out_unblock:
nlmclnt_finish_block(block);
out:
- nlm_release_call(req);
+ nlmclnt_release_call(req);
return status;
out_unlock:
/* Fatal error: ensure that we remove the lock altogether */
@@ -695,7 +694,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
/* What to do now? I'm out of my depth... */
status = -ENOLCK;
out:
- nlm_release_call(req);
+ nlmclnt_release_call(req);
return status;
}
@@ -756,7 +755,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
NLMPROC_CANCEL, &nlmclnt_cancel_ops);
if (status == 0 && req->a_res.status == nlm_lck_denied)
status = -ENOLCK;
- nlm_release_call(req);
+ nlmclnt_release_call(req);
return status;
}
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
new file mode 100644
index 0000000..180ac34
--- /dev/null
+++ b/fs/lockd/clntxdr.c
@@ -0,0 +1,627 @@
+/*
+ * linux/fs/lockd/clntxdr.c
+ *
+ * XDR functions to encode/decode NLM version 3 RPC arguments and results.
+ * NLM version 3 is backwards compatible with NLM versions 1 and 2.
+ *
+ * NLM client-side only.
+ *
+ * Copyright (C) 2010, Oracle. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/lockd/lockd.h>
+
+#define NLMDBG_FACILITY NLMDBG_XDR
+
+#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
+# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
+#endif
+
+/*
+ * Declare the space requirements for NLM arguments and replies as
+ * number of 32bit-words
+ */
+#define NLM_cookie_sz (1+(NLM_MAXCOOKIELEN>>2))
+#define NLM_caller_sz (1+(NLMCLNT_OHSIZE>>2))
+#define NLM_owner_sz (1+(NLMCLNT_OHSIZE>>2))
+#define NLM_fhandle_sz (1+(NFS2_FHSIZE>>2))
+#define NLM_lock_sz (3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz)
+#define NLM_holder_sz (4+NLM_owner_sz)
+
+#define NLM_testargs_sz (NLM_cookie_sz+1+NLM_lock_sz)
+#define NLM_lockargs_sz (NLM_cookie_sz+4+NLM_lock_sz)
+#define NLM_cancargs_sz (NLM_cookie_sz+2+NLM_lock_sz)
+#define NLM_unlockargs_sz (NLM_cookie_sz+NLM_lock_sz)
+
+#define NLM_testres_sz (NLM_cookie_sz+1+NLM_holder_sz)
+#define NLM_res_sz (NLM_cookie_sz+1)
+#define NLM_norep_sz (0)
+
+
+static s32 loff_t_to_s32(loff_t offset)
+{
+ s32 res;
+
+ if (offset >= NLM_OFFSET_MAX)
+ res = NLM_OFFSET_MAX;
+ else if (offset <= -NLM_OFFSET_MAX)
+ res = -NLM_OFFSET_MAX;
+ else
+ res = offset;
+ return res;
+}
+
+static void nlm_compute_offsets(const struct nlm_lock *lock,
+ u32 *l_offset, u32 *l_len)
+{
+ const struct file_lock *fl = &lock->fl;
+
+ BUG_ON(fl->fl_start > NLM_OFFSET_MAX);
+ BUG_ON(fl->fl_end > NLM_OFFSET_MAX &&
+ fl->fl_end != OFFSET_MAX);
+
+ *l_offset = loff_t_to_s32(fl->fl_start);
+ if (fl->fl_end == OFFSET_MAX)
+ *l_len = 0;
+ else
+ *l_len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
+}
+
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+ dprintk("lockd: %s prematurely hit the end of our receive buffer. "
+ "Remaining buffer length is %tu words.\n",
+ func, xdr->end - xdr->p);
+}
+
+
+/*
+ * Encode/decode NLMv3 basic data types
+ *
+ * Basic NLMv3 data types are not defined in an IETF standards
+ * document. X/Open has a description of these data types that
+ * is useful. See Chapter 10 of "Protocols for Interworking:
+ * XNFS, Version 3W".
+ *
+ * Not all basic data types have their own encoding and decoding
+ * functions. For run-time efficiency, some data types are encoded
+ * or decoded inline.
+ */
+
+static void encode_bool(struct xdr_stream *xdr, const int value)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p = value ? xdr_one : xdr_zero;
+}
+
+static void encode_int32(struct xdr_stream *xdr, const s32 value)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(value);
+}
+
+/*
+ * typedef opaque netobj<MAXNETOBJ_SZ>
+ */
+static void encode_netobj(struct xdr_stream *xdr,
+ const u8 *data, const unsigned int length)
+{
+ __be32 *p;
+
+ BUG_ON(length > XDR_MAX_NETOBJ);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, data, length);
+}
+
+static int decode_netobj(struct xdr_stream *xdr,
+ struct xdr_netobj *obj)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ length = be32_to_cpup(p++);
+ if (unlikely(length > XDR_MAX_NETOBJ))
+ goto out_size;
+ obj->len = length;
+ obj->data = (u8 *)p;
+ return 0;
+out_size:
+ dprintk("NFS: returned netobj was too long: %u\n", length);
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * netobj cookie;
+ */
+static void encode_cookie(struct xdr_stream *xdr,
+ const struct nlm_cookie *cookie)
+{
+ BUG_ON(cookie->len > NLM_MAXCOOKIELEN);
+ encode_netobj(xdr, (u8 *)&cookie->data, cookie->len);
+}
+
+static int decode_cookie(struct xdr_stream *xdr,
+ struct nlm_cookie *cookie)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ length = be32_to_cpup(p++);
+ /* apparently HPUX can return empty cookies */
+ if (length == 0)
+ goto out_hpux;
+ if (length > NLM_MAXCOOKIELEN)
+ goto out_size;
+ p = xdr_inline_decode(xdr, length);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ cookie->len = length;
+ memcpy(cookie->data, p, length);
+ return 0;
+out_hpux:
+ cookie->len = 4;
+ memset(cookie->data, 0, 4);
+ return 0;
+out_size:
+ dprintk("NFS: returned cookie was too long: %u\n", length);
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * netobj fh;
+ */
+static void encode_fh(struct xdr_stream *xdr, const struct nfs_fh *fh)
+{
+ BUG_ON(fh->size != NFS2_FHSIZE);
+ encode_netobj(xdr, (u8 *)&fh->data, NFS2_FHSIZE);
+}
+
+/*
+ * enum nlm_stats {
+ * LCK_GRANTED = 0,
+ * LCK_DENIED = 1,
+ * LCK_DENIED_NOLOCKS = 2,
+ * LCK_BLOCKED = 3,
+ * LCK_DENIED_GRACE_PERIOD = 4
+ * };
+ *
+ *
+ * struct nlm_stat {
+ * nlm_stats stat;
+ * };
+ *
+ * NB: we don't swap bytes for the NLM status values. The upper
+ * layers deal directly with the status value in network byte
+ * order.
+ */
+
+static void encode_nlm_stat(struct xdr_stream *xdr,
+ const __be32 stat)
+{
+ __be32 *p;
+
+ BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
+ p = xdr_reserve_space(xdr, 4);
+ *p = stat;
+}
+
+static int decode_nlm_stat(struct xdr_stream *xdr,
+ __be32 *stat)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (unlikely(*p > nlm_lck_denied_grace_period))
+ goto out_enum;
+ *stat = *p;
+ return 0;
+out_enum:
+ dprintk("%s: server returned invalid nlm_stats value: %u\n",
+ __func__, be32_to_cpup(p));
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * struct nlm_holder {
+ * bool exclusive;
+ * int uppid;
+ * netobj oh;
+ * unsigned l_offset;
+ * unsigned l_len;
+ * };
+ */
+static void encode_nlm_holder(struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ const struct nlm_lock *lock = &result->lock;
+ u32 l_offset, l_len;
+ __be32 *p;
+
+ encode_bool(xdr, lock->fl.fl_type == F_RDLCK);
+ encode_int32(xdr, lock->svid);
+ encode_netobj(xdr, lock->oh.data, lock->oh.len);
+
+ p = xdr_reserve_space(xdr, 4 + 4);
+ nlm_compute_offsets(lock, &l_offset, &l_len);
+ *p++ = cpu_to_be32(l_offset);
+ *p = cpu_to_be32(l_len);
+}
+
+static int decode_nlm_holder(struct xdr_stream *xdr, struct nlm_res *result)
+{
+ struct nlm_lock *lock = &result->lock;
+ struct file_lock *fl = &lock->fl;
+ u32 exclusive, l_offset, l_len;
+ int error;
+ __be32 *p;
+ s32 end;
+
+ memset(lock, 0, sizeof(*lock));
+ locks_init_lock(fl);
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ exclusive = be32_to_cpup(p++);
+ lock->svid = be32_to_cpup(p);
+ fl->fl_pid = (pid_t)lock->svid;
+
+ error = decode_netobj(xdr, &lock->oh);
+ if (unlikely(error))
+ goto out;
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+
+ fl->fl_flags = FL_POSIX;
+ fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
+ l_offset = be32_to_cpup(p++);
+ l_len = be32_to_cpup(p);
+ end = l_offset + l_len - 1;
+
+ fl->fl_start = (loff_t)l_offset;
+ if (l_len == 0 || end < 0)
+ fl->fl_end = OFFSET_MAX;
+ else
+ fl->fl_end = (loff_t)end;
+ error = 0;
+out:
+ return error;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * string caller_name<LM_MAXSTRLEN>;
+ */
+static void encode_caller_name(struct xdr_stream *xdr, const char *name)
+{
+ /* NB: client-side does not set lock->len */
+ u32 length = strlen(name);
+ __be32 *p;
+
+ BUG_ON(length > NLM_MAXSTRLEN);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, name, length);
+}
+
+/*
+ * struct nlm_lock {
+ * string caller_name<LM_MAXSTRLEN>;
+ * netobj fh;
+ * netobj oh;
+ * int uppid;
+ * unsigned l_offset;
+ * unsigned l_len;
+ * };
+ */
+static void encode_nlm_lock(struct xdr_stream *xdr,
+ const struct nlm_lock *lock)
+{
+ u32 l_offset, l_len;
+ __be32 *p;
+
+ encode_caller_name(xdr, lock->caller);
+ encode_fh(xdr, &lock->fh);
+ encode_netobj(xdr, lock->oh.data, lock->oh.len);
+
+ p = xdr_reserve_space(xdr, 4 + 4 + 4);
+ *p++ = cpu_to_be32(lock->svid);
+
+ nlm_compute_offsets(lock, &l_offset, &l_len);
+ *p++ = cpu_to_be32(l_offset);
+ *p = cpu_to_be32(l_len);
+}
+
+
+/*
+ * NLMv3 XDR encode functions
+ *
+ * NLMv3 argument types are defined in Chapter 10 of The Open Group's
+ * "Protocols for Interworking: XNFS, Version 3W".
+ */
+
+/*
+ * struct nlm_testargs {
+ * netobj cookie;
+ * bool exclusive;
+ * struct nlm_lock alock;
+ * };
+ */
+static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_nlm_lock(xdr, lock);
+}
+
+/*
+ * struct nlm_lockargs {
+ * netobj cookie;
+ * bool block;
+ * bool exclusive;
+ * struct nlm_lock alock;
+ * bool reclaim;
+ * int state;
+ * };
+ */
+static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_bool(xdr, args->block);
+ encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_nlm_lock(xdr, lock);
+ encode_bool(xdr, args->reclaim);
+ encode_int32(xdr, args->state);
+}
+
+/*
+ * struct nlm_cancargs {
+ * netobj cookie;
+ * bool block;
+ * bool exclusive;
+ * struct nlm_lock alock;
+ * };
+ */
+static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_bool(xdr, args->block);
+ encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_nlm_lock(xdr, lock);
+}
+
+/*
+ * struct nlm_unlockargs {
+ * netobj cookie;
+ * struct nlm_lock alock;
+ * };
+ */
+static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_nlm_lock(xdr, lock);
+}
+
+/*
+ * struct nlm_res {
+ * netobj cookie;
+ * nlm_stat stat;
+ * };
+ */
+static void nlm_xdr_enc_res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ encode_cookie(xdr, &result->cookie);
+ encode_nlm_stat(xdr, result->status);
+}
+
+/*
+ * union nlm_testrply switch (nlm_stats stat) {
+ * case LCK_DENIED:
+ * struct nlm_holder holder;
+ * default:
+ * void;
+ * };
+ *
+ * struct nlm_testres {
+ * netobj cookie;
+ * nlm_testrply test_stat;
+ * };
+ */
+static void encode_nlm_testrply(struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ if (result->status == nlm_lck_denied)
+ encode_nlm_holder(xdr, result);
+}
+
+static void nlm_xdr_enc_testres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ encode_cookie(xdr, &result->cookie);
+ encode_nlm_stat(xdr, result->status);
+ encode_nlm_testrply(xdr, result);
+}
+
+
+/*
+ * NLMv3 XDR decode functions
+ *
+ * NLMv3 result types are defined in Chapter 10 of The Open Group's
+ * "Protocols for Interworking: XNFS, Version 3W".
+ */
+
+/*
+ * union nlm_testrply switch (nlm_stats stat) {
+ * case LCK_DENIED:
+ * struct nlm_holder holder;
+ * default:
+ * void;
+ * };
+ *
+ * struct nlm_testres {
+ * netobj cookie;
+ * nlm_testrply test_stat;
+ * };
+ */
+static int decode_nlm_testrply(struct xdr_stream *xdr,
+ struct nlm_res *result)
+{
+ int error;
+
+ error = decode_nlm_stat(xdr, &result->status);
+ if (unlikely(error))
+ goto out;
+ if (result->status == nlm_lck_denied)
+ error = decode_nlm_holder(xdr, result);
+out:
+ return error;
+}
+
+static int nlm_xdr_dec_testres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nlm_res *result)
+{
+ int error;
+
+ error = decode_cookie(xdr, &result->cookie);
+ if (unlikely(error))
+ goto out;
+ error = decode_nlm_testrply(xdr, result);
+out:
+ return error;
+}
+
+/*
+ * struct nlm_res {
+ * netobj cookie;
+ * nlm_stat stat;
+ * };
+ */
+static int nlm_xdr_dec_res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nlm_res *result)
+{
+ int error;
+
+ error = decode_cookie(xdr, &result->cookie);
+ if (unlikely(error))
+ goto out;
+ error = decode_nlm_stat(xdr, &result->status);
+out:
+ return error;
+}
+
+
+/*
+ * For NLM, a void procedure really returns nothing
+ */
+#define nlm_xdr_dec_norep NULL
+
+#define PROC(proc, argtype, restype) \
+[NLMPROC_##proc] = { \
+ .p_proc = NLMPROC_##proc, \
+ .p_encode = (kxdreproc_t)nlm_xdr_enc_##argtype, \
+ .p_decode = (kxdrdproc_t)nlm_xdr_dec_##restype, \
+ .p_arglen = NLM_##argtype##_sz, \
+ .p_replen = NLM_##restype##_sz, \
+ .p_statidx = NLMPROC_##proc, \
+ .p_name = #proc, \
+ }
+
+static struct rpc_procinfo nlm_procedures[] = {
+ PROC(TEST, testargs, testres),
+ PROC(LOCK, lockargs, res),
+ PROC(CANCEL, cancargs, res),
+ PROC(UNLOCK, unlockargs, res),
+ PROC(GRANTED, testargs, res),
+ PROC(TEST_MSG, testargs, norep),
+ PROC(LOCK_MSG, lockargs, norep),
+ PROC(CANCEL_MSG, cancargs, norep),
+ PROC(UNLOCK_MSG, unlockargs, norep),
+ PROC(GRANTED_MSG, testargs, norep),
+ PROC(TEST_RES, testres, norep),
+ PROC(LOCK_RES, res, norep),
+ PROC(CANCEL_RES, res, norep),
+ PROC(UNLOCK_RES, res, norep),
+ PROC(GRANTED_RES, res, norep),
+};
+
+static struct rpc_version nlm_version1 = {
+ .number = 1,
+ .nrprocs = ARRAY_SIZE(nlm_procedures),
+ .procs = nlm_procedures,
+};
+
+static struct rpc_version nlm_version3 = {
+ .number = 3,
+ .nrprocs = ARRAY_SIZE(nlm_procedures),
+ .procs = nlm_procedures,
+};
+
+static struct rpc_version *nlm_versions[] = {
+ [1] = &nlm_version1,
+ [3] = &nlm_version3,
+#ifdef CONFIG_LOCKD_V4
+ [4] = &nlm_version4,
+#endif
+};
+
+static struct rpc_stat nlm_rpc_stats;
+
+struct rpc_program nlm_program = {
+ .name = "lockd",
+ .number = NLM_PROGRAM,
+ .nrvers = ARRAY_SIZE(nlm_versions),
+ .version = nlm_versions,
+ .stats = &nlm_rpc_stats,
+};
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 25e21e4..5f1bcb2 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -25,9 +25,22 @@
#define NLM_HOST_EXPIRE (300 * HZ)
#define NLM_HOST_COLLECT (120 * HZ)
-static struct hlist_head nlm_hosts[NLM_HOST_NRHASH];
+static struct hlist_head nlm_server_hosts[NLM_HOST_NRHASH];
+static struct hlist_head nlm_client_hosts[NLM_HOST_NRHASH];
+
+#define for_each_host(host, pos, chain, table) \
+ for ((chain) = (table); \
+ (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
+ hlist_for_each_entry((host), (pos), (chain), h_hash)
+
+#define for_each_host_safe(host, pos, next, chain, table) \
+ for ((chain) = (table); \
+ (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
+ hlist_for_each_entry_safe((host), (pos), (next), \
+ (chain), h_hash)
+
static unsigned long next_gc;
-static int nrhosts;
+static unsigned long nrhosts;
static DEFINE_MUTEX(nlm_host_mutex);
static void nlm_gc_hosts(void);
@@ -40,8 +53,6 @@ struct nlm_lookup_host_info {
const u32 version; /* NLM version to search for */
const char *hostname; /* remote's hostname */
const size_t hostname_len; /* it's length */
- const struct sockaddr *src_sap; /* our address (optional) */
- const size_t src_len; /* it's length */
const int noresvport; /* use non-priv port */
};
@@ -88,126 +99,83 @@ static unsigned int nlm_hash_address(const struct sockaddr *sap)
}
/*
- * Common host lookup routine for server & client
+ * Allocate and initialize an nlm_host. Common to both client and server.
*/
-static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
+static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
+ struct nsm_handle *nsm)
{
- struct hlist_head *chain;
- struct hlist_node *pos;
- struct nlm_host *host;
- struct nsm_handle *nsm = NULL;
-
- mutex_lock(&nlm_host_mutex);
+ struct nlm_host *host = NULL;
+ unsigned long now = jiffies;
- if (time_after_eq(jiffies, next_gc))
- nlm_gc_hosts();
-
- /* We may keep several nlm_host objects for a peer, because each
- * nlm_host is identified by
- * (address, protocol, version, server/client)
- * We could probably simplify this a little by putting all those
- * different NLM rpc_clients into one single nlm_host object.
- * This would allow us to have one nlm_host per address.
- */
- chain = &nlm_hosts[nlm_hash_address(ni->sap)];
- hlist_for_each_entry(host, pos, chain, h_hash) {
- if (!rpc_cmp_addr(nlm_addr(host), ni->sap))
- continue;
-
- /* See if we have an NSM handle for this client */
- if (!nsm)
- nsm = host->h_nsmhandle;
-
- if (host->h_proto != ni->protocol)
- continue;
- if (host->h_version != ni->version)
- continue;
- if (host->h_server != ni->server)
- continue;
- if (ni->server &&
- !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
- continue;
-
- /* Move to head of hash chain. */
- hlist_del(&host->h_hash);
- hlist_add_head(&host->h_hash, chain);
-
- nlm_get_host(host);
- dprintk("lockd: nlm_lookup_host found host %s (%s)\n",
- host->h_name, host->h_addrbuf);
- goto out;
- }
-
- /*
- * The host wasn't in our hash table. If we don't
- * have an NSM handle for it yet, create one.
- */
- if (nsm)
+ if (nsm != NULL)
atomic_inc(&nsm->sm_count);
else {
host = NULL;
nsm = nsm_get_handle(ni->sap, ni->salen,
ni->hostname, ni->hostname_len);
- if (!nsm) {
- dprintk("lockd: nlm_lookup_host failed; "
- "no nsm handle\n");
+ if (unlikely(nsm == NULL)) {
+ dprintk("lockd: %s failed; no nsm handle\n",
+ __func__);
goto out;
}
}
- host = kzalloc(sizeof(*host), GFP_KERNEL);
- if (!host) {
+ host = kmalloc(sizeof(*host), GFP_KERNEL);
+ if (unlikely(host == NULL)) {
+ dprintk("lockd: %s failed; no memory\n", __func__);
nsm_release(nsm);
- dprintk("lockd: nlm_lookup_host failed; no memory\n");
goto out;
}
- host->h_name = nsm->sm_name;
- host->h_addrbuf = nsm->sm_addrbuf;
+
memcpy(nlm_addr(host), ni->sap, ni->salen);
- host->h_addrlen = ni->salen;
+ host->h_addrlen = ni->salen;
rpc_set_port(nlm_addr(host), 0);
- memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len);
+ host->h_srcaddrlen = 0;
+
+ host->h_rpcclnt = NULL;
+ host->h_name = nsm->sm_name;
host->h_version = ni->version;
host->h_proto = ni->protocol;
- host->h_rpcclnt = NULL;
- mutex_init(&host->h_mutex);
- host->h_nextrebind = jiffies + NLM_HOST_REBIND;
- host->h_expires = jiffies + NLM_HOST_EXPIRE;
- atomic_set(&host->h_count, 1);
+ host->h_reclaiming = 0;
+ host->h_server = ni->server;
+ host->h_noresvport = ni->noresvport;
+ host->h_inuse = 0;
init_waitqueue_head(&host->h_gracewait);
init_rwsem(&host->h_rwsem);
- host->h_state = 0; /* pseudo NSM state */
- host->h_nsmstate = 0; /* real NSM state */
- host->h_nsmhandle = nsm;
- host->h_server = ni->server;
- host->h_noresvport = ni->noresvport;
- hlist_add_head(&host->h_hash, chain);
+ host->h_state = 0;
+ host->h_nsmstate = 0;
+ host->h_pidcount = 0;
+ atomic_set(&host->h_count, 1);
+ mutex_init(&host->h_mutex);
+ host->h_nextrebind = now + NLM_HOST_REBIND;
+ host->h_expires = now + NLM_HOST_EXPIRE;
INIT_LIST_HEAD(&host->h_lockowners);
spin_lock_init(&host->h_lock);
INIT_LIST_HEAD(&host->h_granted);
INIT_LIST_HEAD(&host->h_reclaim);
-
- nrhosts++;
-
- dprintk("lockd: nlm_lookup_host created host %s\n",
- host->h_name);
+ host->h_nsmhandle = nsm;
+ host->h_addrbuf = nsm->sm_addrbuf;
out:
- mutex_unlock(&nlm_host_mutex);
return host;
}
/*
- * Destroy a host
+ * Destroy an nlm_host and free associated resources
+ *
+ * Caller must hold nlm_host_mutex.
*/
-static void
-nlm_destroy_host(struct nlm_host *host)
+static void nlm_destroy_host_locked(struct nlm_host *host)
{
struct rpc_clnt *clnt;
+ dprintk("lockd: destroy host %s\n", host->h_name);
+
BUG_ON(!list_empty(&host->h_lockowners));
BUG_ON(atomic_read(&host->h_count));
+ hlist_del_init(&host->h_hash);
+
nsm_unmonitor(host);
nsm_release(host->h_nsmhandle);
@@ -215,6 +183,8 @@ nlm_destroy_host(struct nlm_host *host)
if (clnt != NULL)
rpc_shutdown_client(clnt);
kfree(host);
+
+ nrhosts--;
}
/**
@@ -238,9 +208,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
const char *hostname,
int noresvport)
{
- const struct sockaddr source = {
- .sa_family = AF_UNSPEC,
- };
struct nlm_lookup_host_info ni = {
.server = 0,
.sap = sap,
@@ -249,16 +216,78 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
.version = version,
.hostname = hostname,
.hostname_len = strlen(hostname),
- .src_sap = &source,
- .src_len = sizeof(source),
.noresvport = noresvport,
};
+ struct hlist_head *chain;
+ struct hlist_node *pos;
+ struct nlm_host *host;
+ struct nsm_handle *nsm = NULL;
dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
(hostname ? hostname : "<none>"), version,
(protocol == IPPROTO_UDP ? "udp" : "tcp"));
- return nlm_lookup_host(&ni);
+ mutex_lock(&nlm_host_mutex);
+
+ chain = &nlm_client_hosts[nlm_hash_address(sap)];
+ hlist_for_each_entry(host, pos, chain, h_hash) {
+ if (!rpc_cmp_addr(nlm_addr(host), sap))
+ continue;
+
+ /* Same address. Share an NSM handle if we already have one */
+ if (nsm == NULL)
+ nsm = host->h_nsmhandle;
+
+ if (host->h_proto != protocol)
+ continue;
+ if (host->h_version != version)
+ continue;
+
+ nlm_get_host(host);
+ dprintk("lockd: %s found host %s (%s)\n", __func__,
+ host->h_name, host->h_addrbuf);
+ goto out;
+ }
+
+ host = nlm_alloc_host(&ni, nsm);
+ if (unlikely(host == NULL))
+ goto out;
+
+ hlist_add_head(&host->h_hash, chain);
+ nrhosts++;
+
+ dprintk("lockd: %s created host %s (%s)\n", __func__,
+ host->h_name, host->h_addrbuf);
+
+out:
+ mutex_unlock(&nlm_host_mutex);
+ return host;
+}
+
+/**
+ * nlmclnt_release_host - release client nlm_host
+ * @host: nlm_host to release
+ *
+ */
+void nlmclnt_release_host(struct nlm_host *host)
+{
+ if (host == NULL)
+ return;
+
+ dprintk("lockd: release client host %s\n", host->h_name);
+
+ BUG_ON(atomic_read(&host->h_count) < 0);
+ BUG_ON(host->h_server);
+
+ if (atomic_dec_and_test(&host->h_count)) {
+ BUG_ON(!list_empty(&host->h_lockowners));
+ BUG_ON(!list_empty(&host->h_granted));
+ BUG_ON(!list_empty(&host->h_reclaim));
+
+ mutex_lock(&nlm_host_mutex);
+ nlm_destroy_host_locked(host);
+ mutex_unlock(&nlm_host_mutex);
+ }
}
/**
@@ -283,12 +312,18 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
const char *hostname,
const size_t hostname_len)
{
+ struct hlist_head *chain;
+ struct hlist_node *pos;
+ struct nlm_host *host = NULL;
+ struct nsm_handle *nsm = NULL;
struct sockaddr_in sin = {
.sin_family = AF_INET,
};
struct sockaddr_in6 sin6 = {
.sin6_family = AF_INET6,
};
+ struct sockaddr *src_sap;
+ size_t src_len = rqstp->rq_addrlen;
struct nlm_lookup_host_info ni = {
.server = 1,
.sap = svc_addr(rqstp),
@@ -297,27 +332,91 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
.version = rqstp->rq_vers,
.hostname = hostname,
.hostname_len = hostname_len,
- .src_len = rqstp->rq_addrlen,
};
dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__,
(int)hostname_len, hostname, rqstp->rq_vers,
(rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp"));
+ mutex_lock(&nlm_host_mutex);
+
switch (ni.sap->sa_family) {
case AF_INET:
sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr;
- ni.src_sap = (struct sockaddr *)&sin;
+ src_sap = (struct sockaddr *)&sin;
break;
case AF_INET6:
ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6);
- ni.src_sap = (struct sockaddr *)&sin6;
+ src_sap = (struct sockaddr *)&sin6;
break;
default:
- return NULL;
+ dprintk("lockd: %s failed; unrecognized address family\n",
+ __func__);
+ goto out;
}
- return nlm_lookup_host(&ni);
+ if (time_after_eq(jiffies, next_gc))
+ nlm_gc_hosts();
+
+ chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
+ hlist_for_each_entry(host, pos, chain, h_hash) {
+ if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
+ continue;
+
+ /* Same address. Share an NSM handle if we already have one */
+ if (nsm == NULL)
+ nsm = host->h_nsmhandle;
+
+ if (host->h_proto != ni.protocol)
+ continue;
+ if (host->h_version != ni.version)
+ continue;
+ if (!rpc_cmp_addr(nlm_srcaddr(host), src_sap))
+ continue;
+
+ /* Move to head of hash chain. */
+ hlist_del(&host->h_hash);
+ hlist_add_head(&host->h_hash, chain);
+
+ nlm_get_host(host);
+ dprintk("lockd: %s found host %s (%s)\n",
+ __func__, host->h_name, host->h_addrbuf);
+ goto out;
+ }
+
+ host = nlm_alloc_host(&ni, nsm);
+ if (unlikely(host == NULL))
+ goto out;
+
+ memcpy(nlm_srcaddr(host), src_sap, src_len);
+ host->h_srcaddrlen = src_len;
+ hlist_add_head(&host->h_hash, chain);
+ nrhosts++;
+
+ dprintk("lockd: %s created host %s (%s)\n",
+ __func__, host->h_name, host->h_addrbuf);
+
+out:
+ mutex_unlock(&nlm_host_mutex);
+ return host;
+}
+
+/**
+ * nlmsvc_release_host - release server nlm_host
+ * @host: nlm_host to release
+ *
+ * Host is destroyed later in nlm_gc_host().
+ */
+void nlmsvc_release_host(struct nlm_host *host)
+{
+ if (host == NULL)
+ return;
+
+ dprintk("lockd: release server host %s\n", host->h_name);
+
+ BUG_ON(atomic_read(&host->h_count) < 0);
+ BUG_ON(!host->h_server);
+ atomic_dec(&host->h_count);
}
/*
@@ -357,7 +456,6 @@ nlm_bind_host(struct nlm_host *host)
.protocol = host->h_proto,
.address = nlm_addr(host),
.addrsize = host->h_addrlen,
- .saddress = nlm_srcaddr(host),
.timeout = &timeparms,
.servername = host->h_name,
.program = &nlm_program,
@@ -376,6 +474,8 @@ nlm_bind_host(struct nlm_host *host)
args.flags |= RPC_CLNT_CREATE_HARDRTRY;
if (host->h_noresvport)
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+ if (host->h_srcaddrlen)
+ args.saddress = nlm_srcaddr(host);
clnt = rpc_create(&args);
if (!IS_ERR(clnt))
@@ -416,20 +516,28 @@ struct nlm_host * nlm_get_host(struct nlm_host *host)
return host;
}
-/*
- * Release NLM host after use
- */
-void nlm_release_host(struct nlm_host *host)
+static struct nlm_host *next_host_state(struct hlist_head *cache,
+ struct nsm_handle *nsm,
+ const struct nlm_reboot *info)
{
- if (host != NULL) {
- dprintk("lockd: release host %s\n", host->h_name);
- BUG_ON(atomic_read(&host->h_count) < 0);
- if (atomic_dec_and_test(&host->h_count)) {
- BUG_ON(!list_empty(&host->h_lockowners));
- BUG_ON(!list_empty(&host->h_granted));
- BUG_ON(!list_empty(&host->h_reclaim));
+ struct nlm_host *host = NULL;
+ struct hlist_head *chain;
+ struct hlist_node *pos;
+
+ mutex_lock(&nlm_host_mutex);
+ for_each_host(host, pos, chain, cache) {
+ if (host->h_nsmhandle == nsm
+ && host->h_nsmstate != info->state) {
+ host->h_nsmstate = info->state;
+ host->h_state++;
+
+ nlm_get_host(host);
+ goto out;
}
}
+out:
+ mutex_unlock(&nlm_host_mutex);
+ return host;
}
/**
@@ -441,8 +549,6 @@ void nlm_release_host(struct nlm_host *host)
*/
void nlm_host_rebooted(const struct nlm_reboot *info)
{
- struct hlist_head *chain;
- struct hlist_node *pos;
struct nsm_handle *nsm;
struct nlm_host *host;
@@ -455,32 +561,15 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
* lock for this.
* To avoid processing a host several times, we match the nsmstate.
*/
-again: mutex_lock(&nlm_host_mutex);
- for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
- hlist_for_each_entry(host, pos, chain, h_hash) {
- if (host->h_nsmhandle == nsm
- && host->h_nsmstate != info->state) {
- host->h_nsmstate = info->state;
- host->h_state++;
-
- nlm_get_host(host);
- mutex_unlock(&nlm_host_mutex);
-
- if (host->h_server) {
- /* We're server for this guy, just ditch
- * all the locks he held. */
- nlmsvc_free_host_resources(host);
- } else {
- /* He's the server, initiate lock recovery. */
- nlmclnt_recovery(host);
- }
-
- nlm_release_host(host);
- goto again;
- }
- }
+ while ((host = next_host_state(nlm_server_hosts, nsm, info)) != NULL) {
+ nlmsvc_free_host_resources(host);
+ nlmsvc_release_host(host);
}
- mutex_unlock(&nlm_host_mutex);
+ while ((host = next_host_state(nlm_client_hosts, nsm, info)) != NULL) {
+ nlmclnt_recovery(host);
+ nlmclnt_release_host(host);
+ }
+
nsm_release(nsm);
}
@@ -500,13 +589,11 @@ nlm_shutdown_hosts(void)
/* First, make all hosts eligible for gc */
dprintk("lockd: nuking all hosts...\n");
- for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
- hlist_for_each_entry(host, pos, chain, h_hash) {
- host->h_expires = jiffies - 1;
- if (host->h_rpcclnt) {
- rpc_shutdown_client(host->h_rpcclnt);
- host->h_rpcclnt = NULL;
- }
+ for_each_host(host, pos, chain, nlm_server_hosts) {
+ host->h_expires = jiffies - 1;
+ if (host->h_rpcclnt) {
+ rpc_shutdown_client(host->h_rpcclnt);
+ host->h_rpcclnt = NULL;
}
}
@@ -515,15 +602,13 @@ nlm_shutdown_hosts(void)
mutex_unlock(&nlm_host_mutex);
/* complain if any hosts are left */
- if (nrhosts) {
+ if (nrhosts != 0) {
printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
- dprintk("lockd: %d hosts left:\n", nrhosts);
- for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
- hlist_for_each_entry(host, pos, chain, h_hash) {
- dprintk(" %s (cnt %d use %d exp %ld)\n",
- host->h_name, atomic_read(&host->h_count),
- host->h_inuse, host->h_expires);
- }
+ dprintk("lockd: %lu hosts left:\n", nrhosts);
+ for_each_host(host, pos, chain, nlm_server_hosts) {
+ dprintk(" %s (cnt %d use %d exp %ld)\n",
+ host->h_name, atomic_read(&host->h_count),
+ host->h_inuse, host->h_expires);
}
}
}
@@ -541,29 +626,22 @@ nlm_gc_hosts(void)
struct nlm_host *host;
dprintk("lockd: host garbage collection\n");
- for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
- hlist_for_each_entry(host, pos, chain, h_hash)
- host->h_inuse = 0;
- }
+ for_each_host(host, pos, chain, nlm_server_hosts)
+ host->h_inuse = 0;
/* Mark all hosts that hold locks, blocks or shares */
nlmsvc_mark_resources();
- for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
- hlist_for_each_entry_safe(host, pos, next, chain, h_hash) {
- if (atomic_read(&host->h_count) || host->h_inuse
- || time_before(jiffies, host->h_expires)) {
- dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
- host->h_name, atomic_read(&host->h_count),
- host->h_inuse, host->h_expires);
- continue;
- }
- dprintk("lockd: delete host %s\n", host->h_name);
- hlist_del_init(&host->h_hash);
-
- nlm_destroy_host(host);
- nrhosts--;
+ for_each_host_safe(host, pos, next, chain, nlm_server_hosts) {
+ if (atomic_read(&host->h_count) || host->h_inuse
+ || time_before(jiffies, host->h_expires)) {
+ dprintk("nlm_gc_hosts skipping %s "
+ "(cnt %d use %d exp %ld)\n",
+ host->h_name, atomic_read(&host->h_count),
+ host->h_inuse, host->h_expires);
+ continue;
}
+ nlm_destroy_host_locked(host);
}
next_gc = jiffies + NLM_HOST_COLLECT;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e0c9189..23d7451 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -401,26 +401,22 @@ void nsm_release(struct nsm_handle *nsm)
* Status Monitor wire protocol.
*/
-static int encode_nsm_string(struct xdr_stream *xdr, const char *string)
+static void encode_nsm_string(struct xdr_stream *xdr, const char *string)
{
const u32 len = strlen(string);
__be32 *p;
- if (unlikely(len > SM_MAXSTRLEN))
- return -EIO;
- p = xdr_reserve_space(xdr, sizeof(u32) + len);
- if (unlikely(p == NULL))
- return -EIO;
+ BUG_ON(len > SM_MAXSTRLEN);
+ p = xdr_reserve_space(xdr, 4 + len);
xdr_encode_opaque(p, string, len);
- return 0;
}
/*
* "mon_name" specifies the host to be monitored.
*/
-static int encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
+static void encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
{
- return encode_nsm_string(xdr, argp->mon_name);
+ encode_nsm_string(xdr, argp->mon_name);
}
/*
@@ -429,35 +425,25 @@ static int encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
* (via the NLMPROC_SM_NOTIFY call) that the state of host "mon_name"
* has changed.
*/
-static int encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp)
+static void encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp)
{
- int status;
__be32 *p;
- status = encode_nsm_string(xdr, utsname()->nodename);
- if (unlikely(status != 0))
- return status;
- p = xdr_reserve_space(xdr, 3 * sizeof(u32));
- if (unlikely(p == NULL))
- return -EIO;
- *p++ = htonl(argp->prog);
- *p++ = htonl(argp->vers);
- *p++ = htonl(argp->proc);
- return 0;
+ encode_nsm_string(xdr, utsname()->nodename);
+ p = xdr_reserve_space(xdr, 4 + 4 + 4);
+ *p++ = cpu_to_be32(argp->prog);
+ *p++ = cpu_to_be32(argp->vers);
+ *p = cpu_to_be32(argp->proc);
}
/*
* The "mon_id" argument specifies the non-private arguments
* of an NSMPROC_MON or NSMPROC_UNMON call.
*/
-static int encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
+static void encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
{
- int status;
-
- status = encode_mon_name(xdr, argp);
- if (unlikely(status != 0))
- return status;
- return encode_my_id(xdr, argp);
+ encode_mon_name(xdr, argp);
+ encode_my_id(xdr, argp);
}
/*
@@ -465,68 +451,56 @@ static int encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
* by the NSMPROC_MON call. This information will be supplied in the
* NLMPROC_SM_NOTIFY call.
*/
-static int encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
+static void encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
{
__be32 *p;
p = xdr_reserve_space(xdr, SM_PRIV_SIZE);
- if (unlikely(p == NULL))
- return -EIO;
xdr_encode_opaque_fixed(p, argp->priv->data, SM_PRIV_SIZE);
- return 0;
}
-static int xdr_enc_mon(struct rpc_rqst *req, __be32 *p,
- const struct nsm_args *argp)
+static void nsm_xdr_enc_mon(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nsm_args *argp)
{
- struct xdr_stream xdr;
- int status;
-
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- status = encode_mon_id(&xdr, argp);
- if (unlikely(status))
- return status;
- return encode_priv(&xdr, argp);
+ encode_mon_id(xdr, argp);
+ encode_priv(xdr, argp);
}
-static int xdr_enc_unmon(struct rpc_rqst *req, __be32 *p,
- const struct nsm_args *argp)
+static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nsm_args *argp)
{
- struct xdr_stream xdr;
-
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- return encode_mon_id(&xdr, argp);
+ encode_mon_id(xdr, argp);
}
-static int xdr_dec_stat_res(struct rpc_rqst *rqstp, __be32 *p,
- struct nsm_res *resp)
+static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nsm_res *resp)
{
- struct xdr_stream xdr;
+ __be32 *p;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- p = xdr_inline_decode(&xdr, 2 * sizeof(u32));
+ p = xdr_inline_decode(xdr, 4 + 4);
if (unlikely(p == NULL))
return -EIO;
- resp->status = ntohl(*p++);
- resp->state = ntohl(*p);
+ resp->status = be32_to_cpup(p++);
+ resp->state = be32_to_cpup(p);
- dprintk("lockd: xdr_dec_stat_res status %d state %d\n",
- resp->status, resp->state);
+ dprintk("lockd: %s status %d state %d\n",
+ __func__, resp->status, resp->state);
return 0;
}
-static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p,
- struct nsm_res *resp)
+static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nsm_res *resp)
{
- struct xdr_stream xdr;
+ __be32 *p;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- p = xdr_inline_decode(&xdr, sizeof(u32));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
- resp->state = ntohl(*p);
+ resp->state = be32_to_cpup(p);
- dprintk("lockd: xdr_dec_stat state %d\n", resp->state);
+ dprintk("lockd: %s state %d\n", __func__, resp->state);
return 0;
}
@@ -542,8 +516,8 @@ static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p,
static struct rpc_procinfo nsm_procedures[] = {
[NSMPROC_MON] = {
.p_proc = NSMPROC_MON,
- .p_encode = (kxdrproc_t)xdr_enc_mon,
- .p_decode = (kxdrproc_t)xdr_dec_stat_res,
+ .p_encode = (kxdreproc_t)nsm_xdr_enc_mon,
+ .p_decode = (kxdrdproc_t)nsm_xdr_dec_stat_res,
.p_arglen = SM_mon_sz,
.p_replen = SM_monres_sz,
.p_statidx = NSMPROC_MON,
@@ -551,8 +525,8 @@ static struct rpc_procinfo nsm_procedures[] = {
},
[NSMPROC_UNMON] = {
.p_proc = NSMPROC_UNMON,
- .p_encode = (kxdrproc_t)xdr_enc_unmon,
- .p_decode = (kxdrproc_t)xdr_dec_stat,
+ .p_encode = (kxdreproc_t)nsm_xdr_enc_unmon,
+ .p_decode = (kxdrdproc_t)nsm_xdr_dec_stat,
.p_arglen = SM_mon_id_sz,
.p_replen = SM_unmonres_sz,
.p_statidx = NSMPROC_UNMON,
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index a336e83..9a41fdc 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -9,7 +9,6 @@
#include <linux/types.h>
#include <linux/time.h>
-#include <linux/smp_lock.h>
#include <linux/lockd/lockd.h>
#include <linux/lockd/share.h>
@@ -52,7 +51,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
return 0;
no_locks:
- nlm_release_host(host);
+ nlmsvc_release_host(host);
if (error)
return error;
return nlm_lck_denied_nolocks;
@@ -93,7 +92,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
else
dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
}
@@ -135,7 +134,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
}
@@ -165,7 +164,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = nlmsvc_cancel_blocked(file, &argp->lock);
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -198,7 +197,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = nlmsvc_unlock(file, &argp->lock);
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -230,7 +229,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
static void nlm4svc_callback_release(void *data)
{
- nlm_release_call(data);
+ nlmsvc_release_call(data);
}
static const struct rpc_call_ops nlm4svc_callback_ops = {
@@ -262,7 +261,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
stat = func(rqstp, argp, &call->a_res);
if (stat != 0) {
- nlm_release_call(call);
+ nlmsvc_release_call(call);
return stat;
}
@@ -335,7 +334,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = nlmsvc_share_file(host, file, argp);
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -368,7 +367,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = nlmsvc_unshare_file(host, file, argp);
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -400,7 +399,7 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
return rpc_success;
nlmsvc_free_host_resources(host);
- nlm_release_host(host);
+ nlmsvc_release_host(host);
return rpc_success;
}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c462d34..6e31695 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -25,7 +25,6 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/sched.h>
-#include <linux/smp_lock.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/lockd/nlm.h>
@@ -47,6 +46,7 @@ static void nlmsvc_remove_block(struct nlm_block *block);
static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
static void nlmsvc_freegrantargs(struct nlm_rqst *call);
static const struct rpc_call_ops nlmsvc_grant_ops;
+static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie);
/*
* The list of blocked locks to retry
@@ -234,7 +234,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
failed_free:
kfree(block);
failed:
- nlm_release_call(call);
+ nlmsvc_release_call(call);
return NULL;
}
@@ -267,7 +267,7 @@ static void nlmsvc_free_block(struct kref *kref)
mutex_unlock(&file->f_mutex);
nlmsvc_freegrantargs(block->b_call);
- nlm_release_call(block->b_call);
+ nlmsvc_release_call(block->b_call);
nlm_release_file(block->b_file);
kfree(block->b_fl);
kfree(block);
@@ -935,3 +935,32 @@ nlmsvc_retry_blocked(void)
return timeout;
}
+
+#ifdef RPC_DEBUG
+static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
+{
+ /*
+ * We can get away with a static buffer because we're only
+ * called with BKL held.
+ */
+ static char buf[2*NLM_MAXCOOKIELEN+1];
+ unsigned int i, len = sizeof(buf);
+ char *p = buf;
+
+ len--; /* allow for trailing \0 */
+ if (len < 3)
+ return "???";
+ for (i = 0 ; i < cookie->len ; i++) {
+ if (len < 2) {
+ strcpy(p-3, "...");
+ break;
+ }
+ sprintf(p, "%02x", cookie->data[i]);
+ p += 2;
+ len -= 2;
+ }
+ *p = '\0';
+
+ return buf;
+}
+#endif
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index c3069f3..d27aab1 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -9,7 +9,6 @@
#include <linux/types.h>
#include <linux/time.h>
-#include <linux/smp_lock.h>
#include <linux/lockd/lockd.h>
#include <linux/lockd/share.h>
@@ -81,7 +80,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
return 0;
no_locks:
- nlm_release_host(host);
+ nlmsvc_release_host(host);
if (error)
return error;
return nlm_lck_denied_nolocks;
@@ -123,7 +122,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
dprintk("lockd: TEST status %d vers %d\n",
ntohl(resp->status), rqstp->rq_vers);
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
}
@@ -165,7 +164,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
}
@@ -195,7 +194,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock));
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -228,7 +227,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = cast_status(nlmsvc_unlock(file, &argp->lock));
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -258,9 +257,17 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
-task->tk_status);
}
+void nlmsvc_release_call(struct nlm_rqst *call)
+{
+ if (!atomic_dec_and_test(&call->a_count))
+ return;
+ nlmsvc_release_host(call->a_host);
+ kfree(call);
+}
+
static void nlmsvc_callback_release(void *data)
{
- nlm_release_call(data);
+ nlmsvc_release_call(data);
}
static const struct rpc_call_ops nlmsvc_callback_ops = {
@@ -292,7 +299,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
stat = func(rqstp, argp, &call->a_res);
if (stat != 0) {
- nlm_release_call(call);
+ nlmsvc_release_call(call);
return stat;
}
@@ -367,7 +374,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = cast_status(nlmsvc_share_file(host, file, argp));
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -400,7 +407,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -432,7 +439,7 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
return rpc_success;
nlmsvc_free_host_resources(host);
- nlm_release_host(host);
+ nlmsvc_release_host(host);
return rpc_success;
}
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index b583ab0..964666c 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -149,37 +149,6 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
}
/*
- * Encode a lock as part of an NLM call
- */
-static __be32 *
-nlm_encode_lock(__be32 *p, struct nlm_lock *lock)
-{
- struct file_lock *fl = &lock->fl;
- __s32 start, len;
-
- if (!(p = xdr_encode_string(p, lock->caller))
- || !(p = nlm_encode_fh(p, &lock->fh))
- || !(p = nlm_encode_oh(p, &lock->oh)))
- return NULL;
-
- if (fl->fl_start > NLM_OFFSET_MAX
- || (fl->fl_end > NLM_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
- return NULL;
-
- start = loff_t_to_s32(fl->fl_start);
- if (fl->fl_end == OFFSET_MAX)
- len = 0;
- else
- len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
-
- *p++ = htonl(lock->svid);
- *p++ = htonl(start);
- *p++ = htonl(len);
-
- return p;
-}
-
-/*
* Encode result of a TEST/TEST_MSG call
*/
static __be32 *
@@ -372,259 +341,3 @@ nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
{
return xdr_ressize_check(rqstp, p);
}
-
-/*
- * Now, the client side XDR functions
- */
-#ifdef NLMCLNT_SUPPORT_SHARES
-static int
-nlmclt_decode_void(struct rpc_rqst *req, u32 *p, void *ptr)
-{
- return 0;
-}
-#endif
-
-static int
-nlmclt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm_encode_cookie(p, &argp->cookie)))
- return -EIO;
- *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
- if (!(p = nlm_encode_lock(p, lock)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlmclt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm_decode_cookie(p, &resp->cookie)))
- return -EIO;
- resp->status = *p++;
- if (resp->status == nlm_lck_denied) {
- struct file_lock *fl = &resp->lock.fl;
- u32 excl;
- s32 start, len, end;
-
- memset(&resp->lock, 0, sizeof(resp->lock));
- locks_init_lock(fl);
- excl = ntohl(*p++);
- resp->lock.svid = ntohl(*p++);
- fl->fl_pid = (pid_t)resp->lock.svid;
- if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
- return -EIO;
-
- fl->fl_flags = FL_POSIX;
- fl->fl_type = excl? F_WRLCK : F_RDLCK;
- start = ntohl(*p++);
- len = ntohl(*p++);
- end = start + len - 1;
-
- fl->fl_start = s32_to_loff_t(start);
- if (len == 0 || end < 0)
- fl->fl_end = OFFSET_MAX;
- else
- fl->fl_end = s32_to_loff_t(end);
- }
- return 0;
-}
-
-
-static int
-nlmclt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm_encode_cookie(p, &argp->cookie)))
- return -EIO;
- *p++ = argp->block? xdr_one : xdr_zero;
- *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
- if (!(p = nlm_encode_lock(p, lock)))
- return -EIO;
- *p++ = argp->reclaim? xdr_one : xdr_zero;
- *p++ = htonl(argp->state);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlmclt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm_encode_cookie(p, &argp->cookie)))
- return -EIO;
- *p++ = argp->block? xdr_one : xdr_zero;
- *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
- if (!(p = nlm_encode_lock(p, lock)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlmclt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm_encode_cookie(p, &argp->cookie)))
- return -EIO;
- if (!(p = nlm_encode_lock(p, lock)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlmclt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm_encode_cookie(p, &resp->cookie)))
- return -EIO;
- *p++ = resp->status;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlmclt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm_encode_testres(p, resp)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm_decode_cookie(p, &resp->cookie)))
- return -EIO;
- resp->status = *p++;
- return 0;
-}
-
-#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
-# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
-#endif
-
-/*
- * Buffer requirements for NLM
- */
-#define NLM_void_sz 0
-#define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
-#define NLM_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
-#define NLM_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
-#define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE)
-#define NLM_lock_sz 3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz
-#define NLM_holder_sz 4+NLM_owner_sz
-
-#define NLM_testargs_sz NLM_cookie_sz+1+NLM_lock_sz
-#define NLM_lockargs_sz NLM_cookie_sz+4+NLM_lock_sz
-#define NLM_cancargs_sz NLM_cookie_sz+2+NLM_lock_sz
-#define NLM_unlockargs_sz NLM_cookie_sz+NLM_lock_sz
-
-#define NLM_testres_sz NLM_cookie_sz+1+NLM_holder_sz
-#define NLM_res_sz NLM_cookie_sz+1
-#define NLM_norep_sz 0
-
-/*
- * For NLM, a void procedure really returns nothing
- */
-#define nlmclt_decode_norep NULL
-
-#define PROC(proc, argtype, restype) \
-[NLMPROC_##proc] = { \
- .p_proc = NLMPROC_##proc, \
- .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \
- .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \
- .p_arglen = NLM_##argtype##_sz, \
- .p_replen = NLM_##restype##_sz, \
- .p_statidx = NLMPROC_##proc, \
- .p_name = #proc, \
- }
-
-static struct rpc_procinfo nlm_procedures[] = {
- PROC(TEST, testargs, testres),
- PROC(LOCK, lockargs, res),
- PROC(CANCEL, cancargs, res),
- PROC(UNLOCK, unlockargs, res),
- PROC(GRANTED, testargs, res),
- PROC(TEST_MSG, testargs, norep),
- PROC(LOCK_MSG, lockargs, norep),
- PROC(CANCEL_MSG, cancargs, norep),
- PROC(UNLOCK_MSG, unlockargs, norep),
- PROC(GRANTED_MSG, testargs, norep),
- PROC(TEST_RES, testres, norep),
- PROC(LOCK_RES, res, norep),
- PROC(CANCEL_RES, res, norep),
- PROC(UNLOCK_RES, res, norep),
- PROC(GRANTED_RES, res, norep),
-#ifdef NLMCLNT_SUPPORT_SHARES
- PROC(SHARE, shareargs, shareres),
- PROC(UNSHARE, shareargs, shareres),
- PROC(NM_LOCK, lockargs, res),
- PROC(FREE_ALL, notify, void),
-#endif
-};
-
-static struct rpc_version nlm_version1 = {
- .number = 1,
- .nrprocs = 16,
- .procs = nlm_procedures,
-};
-
-static struct rpc_version nlm_version3 = {
- .number = 3,
- .nrprocs = 24,
- .procs = nlm_procedures,
-};
-
-static struct rpc_version * nlm_versions[] = {
- [1] = &nlm_version1,
- [3] = &nlm_version3,
-#ifdef CONFIG_LOCKD_V4
- [4] = &nlm_version4,
-#endif
-};
-
-static struct rpc_stat nlm_stats;
-
-struct rpc_program nlm_program = {
- .name = "lockd",
- .number = NLM_PROGRAM,
- .nrvers = ARRAY_SIZE(nlm_versions),
- .version = nlm_versions,
- .stats = &nlm_stats,
-};
-
-#ifdef RPC_DEBUG
-const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
-{
- /*
- * We can get away with a static buffer because we're only
- * called with BKL held.
- */
- static char buf[2*NLM_MAXCOOKIELEN+1];
- unsigned int i, len = sizeof(buf);
- char *p = buf;
-
- len--; /* allow for trailing \0 */
- if (len < 3)
- return "???";
- for (i = 0 ; i < cookie->len ; i++) {
- if (len < 2) {
- strcpy(p-3, "...");
- break;
- }
- sprintf(p, "%02x", cookie->data[i]);
- p += 2;
- len -= 2;
- }
- *p = '\0';
-
- return buf;
-}
-#endif
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index ad9dbbc..dfa4789 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -93,15 +93,6 @@ nlm4_decode_fh(__be32 *p, struct nfs_fh *f)
return p + XDR_QUADLEN(f->size);
}
-static __be32 *
-nlm4_encode_fh(__be32 *p, struct nfs_fh *f)
-{
- *p++ = htonl(f->size);
- if (f->size) p[XDR_QUADLEN(f->size)-1] = 0; /* don't leak anything */
- memcpy(p, f->data, f->size);
- return p + XDR_QUADLEN(f->size);
-}
-
/*
* Encode and decode owner handle
*/
@@ -112,12 +103,6 @@ nlm4_decode_oh(__be32 *p, struct xdr_netobj *oh)
}
static __be32 *
-nlm4_encode_oh(__be32 *p, struct xdr_netobj *oh)
-{
- return xdr_encode_netobj(p, oh);
-}
-
-static __be32 *
nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
{
struct file_lock *fl = &lock->fl;
@@ -150,38 +135,6 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
}
/*
- * Encode a lock as part of an NLM call
- */
-static __be32 *
-nlm4_encode_lock(__be32 *p, struct nlm_lock *lock)
-{
- struct file_lock *fl = &lock->fl;
- __s64 start, len;
-
- if (!(p = xdr_encode_string(p, lock->caller))
- || !(p = nlm4_encode_fh(p, &lock->fh))
- || !(p = nlm4_encode_oh(p, &lock->oh)))
- return NULL;
-
- if (fl->fl_start > NLM4_OFFSET_MAX
- || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
- return NULL;
-
- *p++ = htonl(lock->svid);
-
- start = loff_t_to_s64(fl->fl_start);
- if (fl->fl_end == OFFSET_MAX)
- len = 0;
- else
- len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
-
- p = xdr_encode_hyper(p, start);
- p = xdr_encode_hyper(p, len);
-
- return p;
-}
-
-/*
* Encode result of a TEST/TEST_MSG call
*/
static __be32 *
@@ -379,211 +332,3 @@ nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
{
return xdr_ressize_check(rqstp, p);
}
-
-/*
- * Now, the client side XDR functions
- */
-#ifdef NLMCLNT_SUPPORT_SHARES
-static int
-nlm4clt_decode_void(struct rpc_rqst *req, __be32 *p, void *ptr)
-{
- return 0;
-}
-#endif
-
-static int
-nlm4clt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
- return -EIO;
- *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
- if (!(p = nlm4_encode_lock(p, lock)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlm4clt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
- return -EIO;
- resp->status = *p++;
- if (resp->status == nlm_lck_denied) {
- struct file_lock *fl = &resp->lock.fl;
- u32 excl;
- __u64 start, len;
- __s64 end;
-
- memset(&resp->lock, 0, sizeof(resp->lock));
- locks_init_lock(fl);
- excl = ntohl(*p++);
- resp->lock.svid = ntohl(*p++);
- fl->fl_pid = (pid_t)resp->lock.svid;
- if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
- return -EIO;
-
- fl->fl_flags = FL_POSIX;
- fl->fl_type = excl? F_WRLCK : F_RDLCK;
- p = xdr_decode_hyper(p, &start);
- p = xdr_decode_hyper(p, &len);
- end = start + len - 1;
-
- fl->fl_start = s64_to_loff_t(start);
- if (len == 0 || end < 0)
- fl->fl_end = OFFSET_MAX;
- else
- fl->fl_end = s64_to_loff_t(end);
- }
- return 0;
-}
-
-
-static int
-nlm4clt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
- return -EIO;
- *p++ = argp->block? xdr_one : xdr_zero;
- *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
- if (!(p = nlm4_encode_lock(p, lock)))
- return -EIO;
- *p++ = argp->reclaim? xdr_one : xdr_zero;
- *p++ = htonl(argp->state);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlm4clt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
- return -EIO;
- *p++ = argp->block? xdr_one : xdr_zero;
- *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
- if (!(p = nlm4_encode_lock(p, lock)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlm4clt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
- return -EIO;
- if (!(p = nlm4_encode_lock(p, lock)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlm4clt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
- return -EIO;
- *p++ = resp->status;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlm4clt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm4_encode_testres(p, resp)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
- return -EIO;
- resp->status = *p++;
- return 0;
-}
-
-#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
-# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
-#endif
-
-#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN)
-# error "NLM host name cannot be larger than NLM's maximum string length!"
-#endif
-
-/*
- * Buffer requirements for NLM
- */
-#define NLM4_void_sz 0
-#define NLM4_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
-#define NLM4_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
-#define NLM4_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
-#define NLM4_fhandle_sz 1+XDR_QUADLEN(NFS3_FHSIZE)
-#define NLM4_lock_sz 5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz
-#define NLM4_holder_sz 6+NLM4_owner_sz
-
-#define NLM4_testargs_sz NLM4_cookie_sz+1+NLM4_lock_sz
-#define NLM4_lockargs_sz NLM4_cookie_sz+4+NLM4_lock_sz
-#define NLM4_cancargs_sz NLM4_cookie_sz+2+NLM4_lock_sz
-#define NLM4_unlockargs_sz NLM4_cookie_sz+NLM4_lock_sz
-
-#define NLM4_testres_sz NLM4_cookie_sz+1+NLM4_holder_sz
-#define NLM4_res_sz NLM4_cookie_sz+1
-#define NLM4_norep_sz 0
-
-/*
- * For NLM, a void procedure really returns nothing
- */
-#define nlm4clt_decode_norep NULL
-
-#define PROC(proc, argtype, restype) \
-[NLMPROC_##proc] = { \
- .p_proc = NLMPROC_##proc, \
- .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \
- .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \
- .p_arglen = NLM4_##argtype##_sz, \
- .p_replen = NLM4_##restype##_sz, \
- .p_statidx = NLMPROC_##proc, \
- .p_name = #proc, \
- }
-
-static struct rpc_procinfo nlm4_procedures[] = {
- PROC(TEST, testargs, testres),
- PROC(LOCK, lockargs, res),
- PROC(CANCEL, cancargs, res),
- PROC(UNLOCK, unlockargs, res),
- PROC(GRANTED, testargs, res),
- PROC(TEST_MSG, testargs, norep),
- PROC(LOCK_MSG, lockargs, norep),
- PROC(CANCEL_MSG, cancargs, norep),
- PROC(UNLOCK_MSG, unlockargs, norep),
- PROC(GRANTED_MSG, testargs, norep),
- PROC(TEST_RES, testres, norep),
- PROC(LOCK_RES, res, norep),
- PROC(CANCEL_RES, res, norep),
- PROC(UNLOCK_RES, res, norep),
- PROC(GRANTED_RES, res, norep),
-#ifdef NLMCLNT_SUPPORT_SHARES
- PROC(SHARE, shareargs, shareres),
- PROC(UNSHARE, shareargs, shareres),
- PROC(NM_LOCK, lockargs, res),
- PROC(FREE_ALL, notify, void),
-#endif
-};
-
-struct rpc_version nlm_version4 = {
- .number = 4,
- .nrprocs = 24,
- .procs = nlm4_procedures,
-};
diff --git a/fs/locks.c b/fs/locks.c
index 50ec159..08415b2 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -122,7 +122,6 @@
#include <linux/module.h>
#include <linux/security.h>
#include <linux/slab.h>
-#include <linux/smp_lock.h>
#include <linux/syscalls.h>
#include <linux/time.h>
#include <linux/rcupdate.h>
@@ -186,7 +185,7 @@ void locks_release_private(struct file_lock *fl)
EXPORT_SYMBOL_GPL(locks_release_private);
/* Free a lock which is not in use. */
-static void locks_free_lock(struct file_lock *fl)
+void locks_free_lock(struct file_lock *fl)
{
BUG_ON(waitqueue_active(&fl->fl_wait));
BUG_ON(!list_empty(&fl->fl_block));
@@ -195,6 +194,7 @@ static void locks_free_lock(struct file_lock *fl)
locks_release_private(fl);
kmem_cache_free(filelock_cache, fl);
}
+EXPORT_SYMBOL(locks_free_lock);
void locks_init_lock(struct file_lock *fl)
{
@@ -234,11 +234,8 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
fl->fl_ops->fl_copy_lock(new, fl);
new->fl_ops = fl->fl_ops;
}
- if (fl->fl_lmops) {
- if (fl->fl_lmops->fl_copy_lock)
- fl->fl_lmops->fl_copy_lock(new, fl);
+ if (fl->fl_lmops)
new->fl_lmops = fl->fl_lmops;
- }
}
/*
@@ -1371,26 +1368,28 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
struct inode *inode = dentry->d_inode;
int error, rdlease_count = 0, wrlease_count = 0;
+ lease = *flp;
+
+ error = -EACCES;
if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE))
- return -EACCES;
+ goto out;
+ error = -EINVAL;
if (!S_ISREG(inode->i_mode))
- return -EINVAL;
+ goto out;
error = security_file_lock(filp, arg);
if (error)
- return error;
+ goto out;
time_out_leases(inode);
BUG_ON(!(*flp)->fl_lmops->fl_break);
- lease = *flp;
-
if (arg != F_UNLCK) {
error = -EAGAIN;
if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
goto out;
if ((arg == F_WRLCK)
- && ((atomic_read(&dentry->d_count) > 1)
+ && ((dentry->d_count > 1)
|| (atomic_read(&inode->i_count) > 1)))
goto out;
}
@@ -1425,8 +1424,9 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
goto out;
if (my_before != NULL) {
- *flp = *my_before;
error = lease->fl_lmops->fl_change(my_before, arg);
+ if (!error)
+ *flp = *my_before;
goto out;
}
@@ -1441,7 +1441,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
return 0;
out:
- locks_free_lock(lease);
return error;
}
EXPORT_SYMBOL(generic_setlease);
@@ -1493,21 +1492,19 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
}
EXPORT_SYMBOL_GPL(vfs_setlease);
-/**
- * fcntl_setlease - sets a lease on an open file
- * @fd: open file descriptor
- * @filp: file pointer
- * @arg: type of lease to obtain
- *
- * Call this fcntl to establish a lease on the file.
- * Note that you also need to call %F_SETSIG to
- * receive a signal when the lease is broken.
- */
-int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+static int do_fcntl_delete_lease(struct file *filp)
{
- struct file_lock *fl;
+ struct file_lock fl, *flp = &fl;
+
+ lease_init(filp, F_UNLCK, flp);
+
+ return vfs_setlease(filp, F_UNLCK, &flp);
+}
+
+static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
+{
+ struct file_lock *fl, *ret;
struct fasync_struct *new;
- struct inode *inode = filp->f_path.dentry->d_inode;
int error;
fl = lease_alloc(filp, arg);
@@ -1519,10 +1516,16 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
locks_free_lock(fl);
return -ENOMEM;
}
+ ret = fl;
lock_flocks();
- error = __vfs_setlease(filp, arg, &fl);
- if (error || arg == F_UNLCK)
- goto out_unlock;
+ error = __vfs_setlease(filp, arg, &ret);
+ if (error) {
+ unlock_flocks();
+ locks_free_lock(fl);
+ goto out_free_fasync;
+ }
+ if (ret != fl)
+ locks_free_lock(fl);
/*
* fasync_insert_entry() returns the old entry if any.
@@ -1530,26 +1533,36 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
* inserted it into the fasync list. Clear new so that
* we don't release it here.
*/
- if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new))
+ if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new))
new = NULL;
- if (error < 0) {
- /* remove lease just inserted by setlease */
- fl->fl_type = F_UNLCK | F_INPROGRESS;
- fl->fl_break_time = jiffies - 10;
- time_out_leases(inode);
- goto out_unlock;
- }
-
error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-out_unlock:
unlock_flocks();
+
+out_free_fasync:
if (new)
fasync_free(new);
return error;
}
/**
+ * fcntl_setlease - sets a lease on an open file
+ * @fd: open file descriptor
+ * @filp: file pointer
+ * @arg: type of lease to obtain
+ *
+ * Call this fcntl to establish a lease on the file.
+ * Note that you also need to call %F_SETSIG to
+ * receive a signal when the lease is broken.
+ */
+int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+{
+ if (arg == F_UNLCK)
+ return do_fcntl_delete_lease(filp);
+ return do_fcntl_add_lease(fd, filp, arg);
+}
+
+/**
* flock_lock_file_wait - Apply a FLOCK-style lock to a file
* @filp: The file to apply the lock to
* @fl: The lock to be applied
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 9bd2ce2..92ca6fb 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -298,9 +298,9 @@ static int bdev_write_sb(struct super_block *sb, struct page *page)
return sync_request(page, bdev, WRITE);
}
-static void bdev_put_device(struct super_block *sb)
+static void bdev_put_device(struct logfs_super *s)
{
- close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE);
+ close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE);
}
static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
@@ -320,8 +320,8 @@ static const struct logfs_device_ops bd_devops = {
.put_device = bdev_put_device,
};
-int logfs_get_sb_bdev(struct file_system_type *type, int flags,
- const char *devname, struct vfsmount *mnt)
+int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
+ const char *devname)
{
struct block_device *bdev;
@@ -332,8 +332,11 @@ int logfs_get_sb_bdev(struct file_system_type *type, int flags,
if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
int mtdnr = MINOR(bdev->bd_dev);
close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
- return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
+ return logfs_get_sb_mtd(p, mtdnr);
}
- return logfs_get_sb_device(type, flags, NULL, bdev, &bd_devops, mnt);
+ p->s_bdev = bdev;
+ p->s_mtd = NULL;
+ p->s_devops = &bd_devops;
+ return 0;
}
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index a85d47d..7466e9d 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -230,9 +230,9 @@ static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len)
__mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
}
-static void mtd_put_device(struct super_block *sb)
+static void mtd_put_device(struct logfs_super *s)
{
- put_mtd_device(logfs_super(sb)->s_mtd);
+ put_mtd_device(s->s_mtd);
}
static int mtd_can_write_buf(struct super_block *sb, u64 ofs)
@@ -265,14 +265,14 @@ static const struct logfs_device_ops mtd_devops = {
.put_device = mtd_put_device,
};
-int logfs_get_sb_mtd(struct file_system_type *type, int flags,
- int mtdnr, struct vfsmount *mnt)
+int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
{
- struct mtd_info *mtd;
- const struct logfs_device_ops *devops = &mtd_devops;
-
- mtd = get_mtd_device(NULL, mtdnr);
+ struct mtd_info *mtd = get_mtd_device(NULL, mtdnr);
if (IS_ERR(mtd))
return PTR_ERR(mtd);
- return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt);
+
+ s->s_bdev = NULL;
+ s->s_mtd = mtd;
+ s->s_devops = &mtd_devops;
+ return 0;
}
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 409dfd6..f9ddf0c 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -555,9 +555,11 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
return __logfs_create(dir, dentry, inode, target, destlen);
}
-static int logfs_permission(struct inode *inode, int mask)
+static int logfs_permission(struct inode *inode, int mask, unsigned int flags)
{
- return generic_permission(inode, mask, NULL);
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+ return generic_permission(inode, mask, flags, NULL);
}
static int logfs_link(struct dentry *old_dentry, struct inode *dir,
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index d8c71ec..03b8c24 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -141,13 +141,20 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
return __logfs_iget(sb, ino);
}
+static void logfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
+}
+
static void __logfs_destroy_inode(struct inode *inode)
{
struct logfs_inode *li = logfs_inode(inode);
BUG_ON(li->li_block);
list_del(&li->li_freeing_list);
- kmem_cache_free(logfs_inode_cache, li);
+ call_rcu(&inode->i_rcu, logfs_i_callback);
}
static void logfs_destroy_inode(struct inode *inode)
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index f46ee8b..9da2970 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -828,7 +828,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
super->s_journal_seg[i] = segno;
super->s_journal_ec[i] = ec;
logfs_set_segment_reserved(sb, segno);
- err = btree_insert32(head, segno, (void *)1, GFP_KERNEL);
+ err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
BUG_ON(err); /* mempool should prevent this */
err = logfs_erase_segment(sb, segno, 1);
BUG_ON(err); /* FIXME: remount-ro would be nicer */
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b878626..57afd4a 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -136,6 +136,7 @@ struct logfs_area_ops {
int (*erase_segment)(struct logfs_area *area);
};
+struct logfs_super; /* forward */
/**
* struct logfs_device_ops - device access operations
*
@@ -156,7 +157,7 @@ struct logfs_device_ops {
int ensure_write);
int (*can_write_buf)(struct super_block *sb, u64 ofs);
void (*sync)(struct super_block *sb);
- void (*put_device)(struct super_block *sb);
+ void (*put_device)(struct logfs_super *s);
};
/**
@@ -471,11 +472,13 @@ void logfs_compr_exit(void);
/* dev_bdev.c */
#ifdef CONFIG_BLOCK
-int logfs_get_sb_bdev(struct file_system_type *type, int flags,
- const char *devname, struct vfsmount *mnt);
+int logfs_get_sb_bdev(struct logfs_super *s,
+ struct file_system_type *type,
+ const char *devname);
#else
-static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
- const char *devname, struct vfsmount *mnt)
+static inline int logfs_get_sb_bdev(struct logfs_super *s,
+ struct file_system_type *type,
+ const char *devname)
{
return -ENODEV;
}
@@ -483,11 +486,9 @@ static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
/* dev_mtd.c */
#ifdef CONFIG_MTD
-int logfs_get_sb_mtd(struct file_system_type *type, int flags,
- int mtdnr, struct vfsmount *mnt);
+int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr);
#else
-static inline int logfs_get_sb_mtd(struct file_system_type *type, int flags,
- int mtdnr, struct vfsmount *mnt)
+static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
{
return -ENODEV;
}
@@ -619,9 +620,6 @@ void emergency_read_end(struct page *page);
void logfs_crash_dump(struct super_block *sb);
void *memchr_inv(const void *s, int c, size_t n);
int logfs_statfs(struct dentry *dentry, struct kstatfs *stats);
-int logfs_get_sb_device(struct file_system_type *type, int flags,
- struct mtd_info *mtd, struct block_device *bdev,
- const struct logfs_device_ops *devops, struct vfsmount *mnt);
int logfs_check_ds(struct logfs_disk_super *ds);
int logfs_write_sb(struct super_block *sb);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 6127baf..ee99a9f 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1994,6 +1994,9 @@ static int do_write_inode(struct inode *inode)
/* FIXME: transaction is part of logfs_block now. Is that enough? */
err = logfs_write_buf(master_inode, page, 0);
+ if (err)
+ move_page_to_inode(inode, page);
+
logfs_put_write_page(page);
return err;
}
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 5336155..33435e4 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -325,7 +325,7 @@ static int logfs_make_writeable(struct super_block *sb)
return 0;
}
-static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
+static int logfs_get_sb_final(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
struct inode *rootdir;
@@ -356,7 +356,6 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
}
log_super("LogFS: Finished mounting\n");
- simple_set_mnt(mnt, sb);
return 0;
fail:
@@ -529,43 +528,37 @@ static void logfs_kill_sb(struct super_block *sb)
logfs_cleanup_rw(sb);
if (super->s_erase_page)
__free_page(super->s_erase_page);
- super->s_devops->put_device(sb);
+ super->s_devops->put_device(super);
logfs_mempool_destroy(super->s_btree_pool);
logfs_mempool_destroy(super->s_alias_pool);
kfree(super);
log_super("LogFS: Finished unmounting\n");
}
-int logfs_get_sb_device(struct file_system_type *type, int flags,
- struct mtd_info *mtd, struct block_device *bdev,
- const struct logfs_device_ops *devops, struct vfsmount *mnt)
+static struct dentry *logfs_get_sb_device(struct logfs_super *super,
+ struct file_system_type *type, int flags)
{
- struct logfs_super *super;
struct super_block *sb;
int err = -ENOMEM;
static int mount_count;
log_super("LogFS: Start mount %x\n", mount_count++);
- super = kzalloc(sizeof(*super), GFP_KERNEL);
- if (!super)
- goto err0;
- super->s_mtd = mtd;
- super->s_bdev = bdev;
err = -EINVAL;
sb = sget(type, logfs_sb_test, logfs_sb_set, super);
- if (IS_ERR(sb))
- goto err0;
+ if (IS_ERR(sb)) {
+ super->s_devops->put_device(super);
+ kfree(super);
+ return ERR_CAST(sb);
+ }
if (sb->s_root) {
/* Device is already in use */
- err = 0;
- simple_set_mnt(mnt, sb);
- goto err0;
+ super->s_devops->put_device(super);
+ kfree(super);
+ return dget(sb->s_root);
}
- super->s_devops = devops;
-
/*
* sb->s_maxbytes is limited to 8TB. On 32bit systems, the page cache
* only covers 16TB and the upper 8TB are used for indirect blocks.
@@ -581,10 +574,12 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
goto err1;
sb->s_flags |= MS_ACTIVE;
- err = logfs_get_sb_final(sb, mnt);
- if (err)
+ err = logfs_get_sb_final(sb);
+ if (err) {
deactivate_locked_super(sb);
- return err;
+ return ERR_PTR(err);
+ }
+ return dget(sb->s_root);
err1:
/* no ->s_root, no ->put_super() */
@@ -592,37 +587,45 @@ err1:
iput(super->s_segfile_inode);
iput(super->s_mapping_inode);
deactivate_locked_super(sb);
- return err;
-err0:
- kfree(super);
- //devops->put_device(sb);
- return err;
+ return ERR_PTR(err);
}
-static int logfs_get_sb(struct file_system_type *type, int flags,
- const char *devname, void *data, struct vfsmount *mnt)
+static struct dentry *logfs_mount(struct file_system_type *type, int flags,
+ const char *devname, void *data)
{
ulong mtdnr;
+ struct logfs_super *super;
+ int err;
- if (!devname)
- return logfs_get_sb_bdev(type, flags, devname, mnt);
- if (strncmp(devname, "mtd", 3))
- return logfs_get_sb_bdev(type, flags, devname, mnt);
+ super = kzalloc(sizeof(*super), GFP_KERNEL);
+ if (!super)
+ return ERR_PTR(-ENOMEM);
- {
+ if (!devname)
+ err = logfs_get_sb_bdev(super, type, devname);
+ else if (strncmp(devname, "mtd", 3))
+ err = logfs_get_sb_bdev(super, type, devname);
+ else {
char *garbage;
mtdnr = simple_strtoul(devname+3, &garbage, 0);
if (*garbage)
- return -EINVAL;
+ err = -EINVAL;
+ else
+ err = logfs_get_sb_mtd(super, mtdnr);
+ }
+
+ if (err) {
+ kfree(super);
+ return ERR_PTR(err);
}
- return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
+ return logfs_get_sb_device(super, type, flags);
}
static struct file_system_type logfs_fs_type = {
.owner = THIS_MODULE,
.name = "logfs",
- .get_sb = logfs_get_sb,
+ .mount = logfs_mount,
.kill_sb = logfs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 9344474..a25444ab 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -76,18 +76,6 @@ EXPORT_SYMBOL(mb_cache_entry_find_first);
EXPORT_SYMBOL(mb_cache_entry_find_next);
#endif
-struct mb_cache {
- struct list_head c_cache_list;
- const char *c_name;
- atomic_t c_entry_count;
- int c_max_entries;
- int c_bucket_bits;
- struct kmem_cache *c_entry_cache;
- struct list_head *c_block_hash;
- struct list_head *c_index_hash;
-};
-
-
/*
* Global data: list of all mbcache's, lru list, and a spinlock for
* accessing cache data structures on SMP machines. The lru list is
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e39d6bf..ae0b83f 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,11 +68,18 @@ static struct inode *minix_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void minix_destroy_inode(struct inode *inode)
+static void minix_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(minix_inode_cachep, minix_i(inode));
}
+static void minix_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, minix_i_callback);
+}
+
static void init_once(void *foo)
{
struct minix_inode_info *ei = (struct minix_inode_info *) foo;
@@ -614,17 +621,16 @@ void minix_truncate(struct inode * inode)
V2_minix_truncate(inode);
}
-static int minix_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *minix_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super);
}
static struct file_system_type minix_fs_type = {
.owner = THIS_MODULE,
.name = "minix",
- .get_sb = minix_get_sb,
+ .mount = minix_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index c0d35a3..1b9e077 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st
struct inode * inode = NULL;
ino_t ino;
- dentry->d_op = dir->i_sb->s_root->d_op;
+ d_set_d_op(dentry, dir->i_sb->s_root->d_op);
if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen)
return ERR_PTR(-ENAMETOOLONG);
diff --git a/fs/namei.c b/fs/namei.c
index f7dbc06..24ece10 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname);
/*
* This does basic POSIX ACL permission checking
*/
-static int acl_permission_check(struct inode *inode, int mask,
- int (*check_acl)(struct inode *inode, int mask))
+static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
+ int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
{
umode_t mode = inode->i_mode;
@@ -180,7 +180,7 @@ static int acl_permission_check(struct inode *inode, int mask,
mode >>= 6;
else {
if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
- int error = check_acl(inode, mask);
+ int error = check_acl(inode, mask, flags);
if (error != -EAGAIN)
return error;
}
@@ -198,25 +198,30 @@ static int acl_permission_check(struct inode *inode, int mask,
}
/**
- * generic_permission - check for access rights on a Posix-like filesystem
+ * generic_permission - check for access rights on a Posix-like filesystem
* @inode: inode to check access rights for
* @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
* @check_acl: optional callback to check for Posix ACLs
+ * @flags: IPERM_FLAG_ flags.
*
* Used to check for read/write/execute permissions on a file.
* We use "fsuid" for this, letting us set arbitrary permissions
* for filesystem access without changing the "normal" uids which
- * are used for other things..
+ * are used for other things.
+ *
+ * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
+ * request cannot be satisfied (eg. requires blocking or too much complexity).
+ * It would then be called again in ref-walk mode.
*/
-int generic_permission(struct inode *inode, int mask,
- int (*check_acl)(struct inode *inode, int mask))
+int generic_permission(struct inode *inode, int mask, unsigned int flags,
+ int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
{
int ret;
/*
* Do the basic POSIX ACL permission checks.
*/
- ret = acl_permission_check(inode, mask, check_acl);
+ ret = acl_permission_check(inode, mask, flags, check_acl);
if (ret != -EACCES)
return ret;
@@ -271,9 +276,10 @@ int inode_permission(struct inode *inode, int mask)
}
if (inode->i_op->permission)
- retval = inode->i_op->permission(inode, mask);
+ retval = inode->i_op->permission(inode, mask, 0);
else
- retval = generic_permission(inode, mask, inode->i_op->check_acl);
+ retval = generic_permission(inode, mask, 0,
+ inode->i_op->check_acl);
if (retval)
return retval;
@@ -362,6 +368,18 @@ void path_get(struct path *path)
EXPORT_SYMBOL(path_get);
/**
+ * path_get_long - get a long reference to a path
+ * @path: path to get the reference to
+ *
+ * Given a path increment the reference count to the dentry and the vfsmount.
+ */
+void path_get_long(struct path *path)
+{
+ mntget_long(path->mnt);
+ dget(path->dentry);
+}
+
+/**
* path_put - put a reference to a path
* @path: path to put the reference to
*
@@ -375,6 +393,185 @@ void path_put(struct path *path)
EXPORT_SYMBOL(path_put);
/**
+ * path_put_long - put a long reference to a path
+ * @path: path to put the reference to
+ *
+ * Given a path decrement the reference count to the dentry and the vfsmount.
+ */
+void path_put_long(struct path *path)
+{
+ dput(path->dentry);
+ mntput_long(path->mnt);
+}
+
+/**
+ * nameidata_drop_rcu - drop this nameidata out of rcu-walk
+ * @nd: nameidata pathwalk data to drop
+ * Returns: 0 on success, -ECHILD on failure
+ *
+ * Path walking has 2 modes, rcu-walk and ref-walk (see
+ * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt
+ * to drop out of rcu-walk mode and take normal reference counts on dentries
+ * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take
+ * refcounts at the last known good point before rcu-walk got stuck, so
+ * ref-walk may continue from there. If this is not successful (eg. a seqcount
+ * has changed), then failure is returned and path walk restarts from the
+ * beginning in ref-walk mode.
+ *
+ * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into
+ * ref-walk. Must be called from rcu-walk context.
+ */
+static int nameidata_drop_rcu(struct nameidata *nd)
+{
+ struct fs_struct *fs = current->fs;
+ struct dentry *dentry = nd->path.dentry;
+
+ BUG_ON(!(nd->flags & LOOKUP_RCU));
+ if (nd->root.mnt) {
+ spin_lock(&fs->lock);
+ if (nd->root.mnt != fs->root.mnt ||
+ nd->root.dentry != fs->root.dentry)
+ goto err_root;
+ }
+ spin_lock(&dentry->d_lock);
+ if (!__d_rcu_to_refcount(dentry, nd->seq))
+ goto err;
+ BUG_ON(nd->inode != dentry->d_inode);
+ spin_unlock(&dentry->d_lock);
+ if (nd->root.mnt) {
+ path_get(&nd->root);
+ spin_unlock(&fs->lock);
+ }
+ mntget(nd->path.mnt);
+
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ nd->flags &= ~LOOKUP_RCU;
+ return 0;
+err:
+ spin_unlock(&dentry->d_lock);
+err_root:
+ if (nd->root.mnt)
+ spin_unlock(&fs->lock);
+ return -ECHILD;
+}
+
+/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
+static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
+{
+ if (nd->flags & LOOKUP_RCU)
+ return nameidata_drop_rcu(nd);
+ return 0;
+}
+
+/**
+ * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk
+ * @nd: nameidata pathwalk data to drop
+ * @dentry: dentry to drop
+ * Returns: 0 on success, -ECHILD on failure
+ *
+ * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root,
+ * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on
+ * @nd. Must be called from rcu-walk context.
+ */
+static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry)
+{
+ struct fs_struct *fs = current->fs;
+ struct dentry *parent = nd->path.dentry;
+
+ BUG_ON(!(nd->flags & LOOKUP_RCU));
+ if (nd->root.mnt) {
+ spin_lock(&fs->lock);
+ if (nd->root.mnt != fs->root.mnt ||
+ nd->root.dentry != fs->root.dentry)
+ goto err_root;
+ }
+ spin_lock(&parent->d_lock);
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ if (!__d_rcu_to_refcount(dentry, nd->seq))
+ goto err;
+ /*
+ * If the sequence check on the child dentry passed, then the child has
+ * not been removed from its parent. This means the parent dentry must
+ * be valid and able to take a reference at this point.
+ */
+ BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
+ BUG_ON(!parent->d_count);
+ parent->d_count++;
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&parent->d_lock);
+ if (nd->root.mnt) {
+ path_get(&nd->root);
+ spin_unlock(&fs->lock);
+ }
+ mntget(nd->path.mnt);
+
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ nd->flags &= ~LOOKUP_RCU;
+ return 0;
+err:
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&parent->d_lock);
+err_root:
+ if (nd->root.mnt)
+ spin_unlock(&fs->lock);
+ return -ECHILD;
+}
+
+/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
+static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
+{
+ if (nd->flags & LOOKUP_RCU)
+ return nameidata_dentry_drop_rcu(nd, dentry);
+ return 0;
+}
+
+/**
+ * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
+ * @nd: nameidata pathwalk data to drop
+ * Returns: 0 on success, -ECHILD on failure
+ *
+ * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
+ * nd->path should be the final element of the lookup, so nd->root is discarded.
+ * Must be called from rcu-walk context.
+ */
+static int nameidata_drop_rcu_last(struct nameidata *nd)
+{
+ struct dentry *dentry = nd->path.dentry;
+
+ BUG_ON(!(nd->flags & LOOKUP_RCU));
+ nd->flags &= ~LOOKUP_RCU;
+ nd->root.mnt = NULL;
+ spin_lock(&dentry->d_lock);
+ if (!__d_rcu_to_refcount(dentry, nd->seq))
+ goto err_unlock;
+ BUG_ON(nd->inode != dentry->d_inode);
+ spin_unlock(&dentry->d_lock);
+
+ mntget(nd->path.mnt);
+
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+
+ return 0;
+
+err_unlock:
+ spin_unlock(&dentry->d_lock);
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ return -ECHILD;
+}
+
+/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
+static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
+{
+ if (likely(nd->flags & LOOKUP_RCU))
+ return nameidata_drop_rcu_last(nd);
+ return 0;
+}
+
+/**
* release_open_intent - free up open intent resources
* @nd: pointer to nameidata
*/
@@ -386,10 +583,26 @@ void release_open_intent(struct nameidata *nd)
fput(nd->intent.open.file);
}
+static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ int status;
+
+ status = dentry->d_op->d_revalidate(dentry, nd);
+ if (status == -ECHILD) {
+ if (nameidata_dentry_drop_rcu(nd, dentry))
+ return status;
+ status = dentry->d_op->d_revalidate(dentry, nd);
+ }
+
+ return status;
+}
+
static inline struct dentry *
do_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- int status = dentry->d_op->d_revalidate(dentry, nd);
+ int status;
+
+ status = d_revalidate(dentry, nd);
if (unlikely(status <= 0)) {
/*
* The dentry failed validation.
@@ -397,19 +610,36 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
* the dentry otherwise d_revalidate is asking us
* to return a fail status.
*/
- if (!status) {
+ if (status < 0) {
+ /* If we're in rcu-walk, we don't have a ref */
+ if (!(nd->flags & LOOKUP_RCU))
+ dput(dentry);
+ dentry = ERR_PTR(status);
+
+ } else {
+ /* Don't d_invalidate in rcu-walk mode */
+ if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
+ return ERR_PTR(-ECHILD);
if (!d_invalidate(dentry)) {
dput(dentry);
dentry = NULL;
}
- } else {
- dput(dentry);
- dentry = ERR_PTR(status);
}
}
return dentry;
}
+static inline int need_reval_dot(struct dentry *dentry)
+{
+ if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
+ return 0;
+
+ if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
+ return 0;
+
+ return 1;
+}
+
/*
* force_reval_path - force revalidation of a dentry
*
@@ -433,13 +663,12 @@ force_reval_path(struct path *path, struct nameidata *nd)
/*
* only check on filesystems where it's possible for the dentry to
- * become stale. It's assumed that if this flag is set then the
- * d_revalidate op will also be defined.
+ * become stale.
*/
- if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))
+ if (!need_reval_dot(dentry))
return 0;
- status = dentry->d_op->d_revalidate(dentry, nd);
+ status = d_revalidate(dentry, nd);
if (status > 0)
return 0;
@@ -459,26 +688,27 @@ force_reval_path(struct path *path, struct nameidata *nd)
* short-cut DAC fails, then call ->permission() to do more
* complete permission check.
*/
-static int exec_permission(struct inode *inode)
+static inline int exec_permission(struct inode *inode, unsigned int flags)
{
int ret;
if (inode->i_op->permission) {
- ret = inode->i_op->permission(inode, MAY_EXEC);
- if (!ret)
- goto ok;
- return ret;
+ ret = inode->i_op->permission(inode, MAY_EXEC, flags);
+ } else {
+ ret = acl_permission_check(inode, MAY_EXEC, flags,
+ inode->i_op->check_acl);
}
- ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl);
- if (!ret)
+ if (likely(!ret))
goto ok;
+ if (ret == -ECHILD)
+ return ret;
if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
goto ok;
return ret;
ok:
- return security_inode_permission(inode, MAY_EXEC);
+ return security_inode_exec_permission(inode, flags);
}
static __always_inline void set_root(struct nameidata *nd)
@@ -489,8 +719,23 @@ static __always_inline void set_root(struct nameidata *nd)
static int link_path_walk(const char *, struct nameidata *);
+static __always_inline void set_root_rcu(struct nameidata *nd)
+{
+ if (!nd->root.mnt) {
+ struct fs_struct *fs = current->fs;
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ nd->root = fs->root;
+ } while (read_seqcount_retry(&fs->seq, seq));
+ }
+}
+
static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
{
+ int ret;
+
if (IS_ERR(link))
goto fail;
@@ -500,8 +745,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
nd->path = nd->root;
path_get(&nd->root);
}
+ nd->inode = nd->path.dentry->d_inode;
- return link_path_walk(link, nd);
+ ret = link_path_walk(link, nd);
+ return ret;
fail:
path_put(&nd->path);
return PTR_ERR(link);
@@ -516,11 +763,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd)
static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
{
- dput(nd->path.dentry);
- if (nd->path.mnt != path->mnt) {
- mntput(nd->path.mnt);
- nd->path.mnt = path->mnt;
+ if (!(nd->flags & LOOKUP_RCU)) {
+ dput(nd->path.dentry);
+ if (nd->path.mnt != path->mnt)
+ mntput(nd->path.mnt);
}
+ nd->path.mnt = path->mnt;
nd->path.dentry = path->dentry;
}
@@ -535,9 +783,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p)
if (path->mnt != nd->path.mnt) {
path_to_nameidata(path, nd);
+ nd->inode = nd->path.dentry->d_inode;
dget(dentry);
}
mntget(path->mnt);
+
nd->last_type = LAST_BIND;
*p = dentry->d_inode->i_op->follow_link(dentry, nd);
error = PTR_ERR(*p);
@@ -591,6 +841,20 @@ loop:
return err;
}
+static int follow_up_rcu(struct path *path)
+{
+ struct vfsmount *parent;
+ struct dentry *mountpoint;
+
+ parent = path->mnt->mnt_parent;
+ if (parent == path->mnt)
+ return 0;
+ mountpoint = path->mnt->mnt_mountpoint;
+ path->dentry = mountpoint;
+ path->mnt = parent;
+ return 1;
+}
+
int follow_up(struct path *path)
{
struct vfsmount *parent;
@@ -612,9 +876,24 @@ int follow_up(struct path *path)
return 1;
}
-/* no need for dcache_lock, as serialization is taken care in
- * namespace.c
+/*
+ * serialization is taken care of in namespace.c
*/
+static void __follow_mount_rcu(struct nameidata *nd, struct path *path,
+ struct inode **inode)
+{
+ while (d_mountpoint(path->dentry)) {
+ struct vfsmount *mounted;
+ mounted = __lookup_mnt(path->mnt, path->dentry, 1);
+ if (!mounted)
+ return;
+ path->mnt = mounted;
+ path->dentry = mounted->mnt_root;
+ nd->seq = read_seqcount_begin(&path->dentry->d_seq);
+ *inode = path->dentry->d_inode;
+ }
+}
+
static int __follow_mount(struct path *path)
{
int res = 0;
@@ -645,9 +924,6 @@ static void follow_mount(struct path *path)
}
}
-/* no need for dcache_lock, as serialization is taken care in
- * namespace.c
- */
int follow_down(struct path *path)
{
struct vfsmount *mounted;
@@ -663,7 +939,42 @@ int follow_down(struct path *path)
return 0;
}
-static __always_inline void follow_dotdot(struct nameidata *nd)
+static int follow_dotdot_rcu(struct nameidata *nd)
+{
+ struct inode *inode = nd->inode;
+
+ set_root_rcu(nd);
+
+ while(1) {
+ if (nd->path.dentry == nd->root.dentry &&
+ nd->path.mnt == nd->root.mnt) {
+ break;
+ }
+ if (nd->path.dentry != nd->path.mnt->mnt_root) {
+ struct dentry *old = nd->path.dentry;
+ struct dentry *parent = old->d_parent;
+ unsigned seq;
+
+ seq = read_seqcount_begin(&parent->d_seq);
+ if (read_seqcount_retry(&old->d_seq, nd->seq))
+ return -ECHILD;
+ inode = parent->d_inode;
+ nd->path.dentry = parent;
+ nd->seq = seq;
+ break;
+ }
+ if (!follow_up_rcu(&nd->path))
+ break;
+ nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+ inode = nd->path.dentry->d_inode;
+ }
+ __follow_mount_rcu(nd, &nd->path, &inode);
+ nd->inode = inode;
+
+ return 0;
+}
+
+static void follow_dotdot(struct nameidata *nd)
{
set_root(nd);
@@ -684,6 +995,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
break;
}
follow_mount(&nd->path);
+ nd->inode = nd->path.dentry->d_inode;
}
/*
@@ -721,17 +1033,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent,
* It _is_ time-critical.
*/
static int do_lookup(struct nameidata *nd, struct qstr *name,
- struct path *path)
+ struct path *path, struct inode **inode)
{
struct vfsmount *mnt = nd->path.mnt;
- struct dentry *dentry, *parent;
+ struct dentry *dentry, *parent = nd->path.dentry;
struct inode *dir;
/*
* See if the low-level filesystem might want
* to use its own hash..
*/
- if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
- int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name);
+ if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
+ int err = parent->d_op->d_hash(parent, nd->inode, name);
if (err < 0)
return err;
}
@@ -741,21 +1053,44 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
* of a false negative due to a concurrent rename, we're going to
* do the non-racy lookup, below.
*/
- dentry = __d_lookup(nd->path.dentry, name);
- if (!dentry)
- goto need_lookup;
+ if (nd->flags & LOOKUP_RCU) {
+ unsigned seq;
+
+ *inode = nd->inode;
+ dentry = __d_lookup_rcu(parent, name, &seq, inode);
+ if (!dentry) {
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ goto need_lookup;
+ }
+ /* Memory barrier in read_seqcount_begin of child is enough */
+ if (__read_seqcount_retry(&parent->d_seq, nd->seq))
+ return -ECHILD;
+
+ nd->seq = seq;
+ if (dentry->d_flags & DCACHE_OP_REVALIDATE)
+ goto need_revalidate;
+ path->mnt = mnt;
+ path->dentry = dentry;
+ __follow_mount_rcu(nd, path, inode);
+ } else {
+ dentry = __d_lookup(parent, name);
+ if (!dentry)
+ goto need_lookup;
found:
- if (dentry->d_op && dentry->d_op->d_revalidate)
- goto need_revalidate;
+ if (dentry->d_flags & DCACHE_OP_REVALIDATE)
+ goto need_revalidate;
done:
- path->mnt = mnt;
- path->dentry = dentry;
- __follow_mount(path);
+ path->mnt = mnt;
+ path->dentry = dentry;
+ __follow_mount(path);
+ *inode = path->dentry->d_inode;
+ }
return 0;
need_lookup:
- parent = nd->path.dentry;
dir = parent->d_inode;
+ BUG_ON(nd->inode != dir);
mutex_lock(&dir->i_mutex);
/*
@@ -817,7 +1152,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
static int link_path_walk(const char *name, struct nameidata *nd)
{
struct path next;
- struct inode *inode;
int err;
unsigned int lookup_flags = nd->flags;
@@ -826,18 +1160,28 @@ static int link_path_walk(const char *name, struct nameidata *nd)
if (!*name)
goto return_reval;
- inode = nd->path.dentry->d_inode;
if (nd->depth)
lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
/* At this point we know we have a real path component. */
for(;;) {
+ struct inode *inode;
unsigned long hash;
struct qstr this;
unsigned int c;
nd->flags |= LOOKUP_CONTINUE;
- err = exec_permission(inode);
+ if (nd->flags & LOOKUP_RCU) {
+ err = exec_permission(nd->inode, IPERM_FLAG_RCU);
+ if (err == -ECHILD) {
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ goto exec_again;
+ }
+ } else {
+exec_again:
+ err = exec_permission(nd->inode, 0);
+ }
if (err)
break;
@@ -868,37 +1212,44 @@ static int link_path_walk(const char *name, struct nameidata *nd)
if (this.name[0] == '.') switch (this.len) {
default:
break;
- case 2:
+ case 2:
if (this.name[1] != '.')
break;
- follow_dotdot(nd);
- inode = nd->path.dentry->d_inode;
+ if (nd->flags & LOOKUP_RCU) {
+ if (follow_dotdot_rcu(nd))
+ return -ECHILD;
+ } else
+ follow_dotdot(nd);
/* fallthrough */
case 1:
continue;
}
/* This does the actual lookups.. */
- err = do_lookup(nd, &this, &next);
+ err = do_lookup(nd, &this, &next, &inode);
if (err)
break;
-
err = -ENOENT;
- inode = next.dentry->d_inode;
if (!inode)
goto out_dput;
if (inode->i_op->follow_link) {
+ /* We commonly drop rcu-walk here */
+ if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
+ return -ECHILD;
+ BUG_ON(inode != next.dentry->d_inode);
err = do_follow_link(&next, nd);
if (err)
goto return_err;
+ nd->inode = nd->path.dentry->d_inode;
err = -ENOENT;
- inode = nd->path.dentry->d_inode;
- if (!inode)
+ if (!nd->inode)
break;
- } else
+ } else {
path_to_nameidata(&next, nd);
+ nd->inode = inode;
+ }
err = -ENOTDIR;
- if (!inode->i_op->lookup)
+ if (!nd->inode->i_op->lookup)
break;
continue;
/* here ends the main loop */
@@ -913,32 +1264,39 @@ last_component:
if (this.name[0] == '.') switch (this.len) {
default:
break;
- case 2:
+ case 2:
if (this.name[1] != '.')
break;
- follow_dotdot(nd);
- inode = nd->path.dentry->d_inode;
+ if (nd->flags & LOOKUP_RCU) {
+ if (follow_dotdot_rcu(nd))
+ return -ECHILD;
+ } else
+ follow_dotdot(nd);
/* fallthrough */
case 1:
goto return_reval;
}
- err = do_lookup(nd, &this, &next);
+ err = do_lookup(nd, &this, &next, &inode);
if (err)
break;
- inode = next.dentry->d_inode;
if (follow_on_final(inode, lookup_flags)) {
+ if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
+ return -ECHILD;
+ BUG_ON(inode != next.dentry->d_inode);
err = do_follow_link(&next, nd);
if (err)
goto return_err;
- inode = nd->path.dentry->d_inode;
- } else
+ nd->inode = nd->path.dentry->d_inode;
+ } else {
path_to_nameidata(&next, nd);
+ nd->inode = inode;
+ }
err = -ENOENT;
- if (!inode)
+ if (!nd->inode)
break;
if (lookup_flags & LOOKUP_DIRECTORY) {
err = -ENOTDIR;
- if (!inode->i_op->lookup)
+ if (!nd->inode->i_op->lookup)
break;
}
goto return_base;
@@ -958,25 +1316,43 @@ return_reval:
* We bypassed the ordinary revalidation routines.
* We may need to check the cached dentry for staleness.
*/
- if (nd->path.dentry && nd->path.dentry->d_sb &&
- (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
- err = -ESTALE;
+ if (need_reval_dot(nd->path.dentry)) {
/* Note: we do not d_invalidate() */
- if (!nd->path.dentry->d_op->d_revalidate(
- nd->path.dentry, nd))
+ err = d_revalidate(nd->path.dentry, nd);
+ if (!err)
+ err = -ESTALE;
+ if (err < 0)
break;
}
return_base:
+ if (nameidata_drop_rcu_last_maybe(nd))
+ return -ECHILD;
return 0;
out_dput:
- path_put_conditional(&next, nd);
+ if (!(nd->flags & LOOKUP_RCU))
+ path_put_conditional(&next, nd);
break;
}
- path_put(&nd->path);
+ if (!(nd->flags & LOOKUP_RCU))
+ path_put(&nd->path);
return_err:
return err;
}
+static inline int path_walk_rcu(const char *name, struct nameidata *nd)
+{
+ current->total_link_count = 0;
+
+ return link_path_walk(name, nd);
+}
+
+static inline int path_walk_simple(const char *name, struct nameidata *nd)
+{
+ current->total_link_count = 0;
+
+ return link_path_walk(name, nd);
+}
+
static int path_walk(const char *name, struct nameidata *nd)
{
struct path save = nd->path;
@@ -1002,6 +1378,93 @@ static int path_walk(const char *name, struct nameidata *nd)
return result;
}
+static void path_finish_rcu(struct nameidata *nd)
+{
+ if (nd->flags & LOOKUP_RCU) {
+ /* RCU dangling. Cancel it. */
+ nd->flags &= ~LOOKUP_RCU;
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ }
+ if (nd->file)
+ fput(nd->file);
+}
+
+static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
+{
+ int retval = 0;
+ int fput_needed;
+ struct file *file;
+
+ nd->last_type = LAST_ROOT; /* if there are only slashes... */
+ nd->flags = flags | LOOKUP_RCU;
+ nd->depth = 0;
+ nd->root.mnt = NULL;
+ nd->file = NULL;
+
+ if (*name=='/') {
+ struct fs_struct *fs = current->fs;
+ unsigned seq;
+
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ nd->root = fs->root;
+ nd->path = nd->root;
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ } while (read_seqcount_retry(&fs->seq, seq));
+
+ } else if (dfd == AT_FDCWD) {
+ struct fs_struct *fs = current->fs;
+ unsigned seq;
+
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ nd->path = fs->pwd;
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ } while (read_seqcount_retry(&fs->seq, seq));
+
+ } else {
+ struct dentry *dentry;
+
+ file = fget_light(dfd, &fput_needed);
+ retval = -EBADF;
+ if (!file)
+ goto out_fail;
+
+ dentry = file->f_path.dentry;
+
+ retval = -ENOTDIR;
+ if (!S_ISDIR(dentry->d_inode->i_mode))
+ goto fput_fail;
+
+ retval = file_permission(file, MAY_EXEC);
+ if (retval)
+ goto fput_fail;
+
+ nd->path = file->f_path;
+ if (fput_needed)
+ nd->file = file;
+
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ }
+ nd->inode = nd->path.dentry->d_inode;
+ return 0;
+
+fput_fail:
+ fput_light(file, fput_needed);
+out_fail:
+ return retval;
+}
+
static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
{
int retval = 0;
@@ -1042,6 +1505,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei
fput_light(file, fput_needed);
}
+ nd->inode = nd->path.dentry->d_inode;
return 0;
fput_fail:
@@ -1054,16 +1518,53 @@ out_fail:
static int do_path_lookup(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
- int retval = path_init(dfd, name, flags, nd);
- if (!retval)
- retval = path_walk(name, nd);
- if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
- nd->path.dentry->d_inode))
- audit_inode(name, nd->path.dentry);
+ int retval;
+
+ /*
+ * Path walking is largely split up into 2 different synchronisation
+ * schemes, rcu-walk and ref-walk (explained in
+ * Documentation/filesystems/path-lookup.txt). These share much of the
+ * path walk code, but some things particularly setup, cleanup, and
+ * following mounts are sufficiently divergent that functions are
+ * duplicated. Typically there is a function foo(), and its RCU
+ * analogue, foo_rcu().
+ *
+ * -ECHILD is the error number of choice (just to avoid clashes) that
+ * is returned if some aspect of an rcu-walk fails. Such an error must
+ * be handled by restarting a traditional ref-walk (which will always
+ * be able to complete).
+ */
+ retval = path_init_rcu(dfd, name, flags, nd);
+ if (unlikely(retval))
+ return retval;
+ retval = path_walk_rcu(name, nd);
+ path_finish_rcu(nd);
if (nd->root.mnt) {
path_put(&nd->root);
nd->root.mnt = NULL;
}
+
+ if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
+ /* slower, locked walk */
+ if (retval == -ESTALE)
+ flags |= LOOKUP_REVAL;
+ retval = path_init(dfd, name, flags, nd);
+ if (unlikely(retval))
+ return retval;
+ retval = path_walk(name, nd);
+ if (nd->root.mnt) {
+ path_put(&nd->root);
+ nd->root.mnt = NULL;
+ }
+ }
+
+ if (likely(!retval)) {
+ if (unlikely(!audit_dummy_context())) {
+ if (nd->path.dentry && nd->inode)
+ audit_inode(name, nd->path.dentry);
+ }
+ }
+
return retval;
}
@@ -1106,10 +1607,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
path_get(&nd->path);
nd->root = nd->path;
path_get(&nd->root);
+ nd->inode = nd->path.dentry->d_inode;
retval = path_walk(name, nd);
if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
- nd->path.dentry->d_inode))
+ nd->inode))
audit_inode(name, nd->path.dentry);
path_put(&nd->root);
@@ -1125,7 +1627,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
struct dentry *dentry;
int err;
- err = exec_permission(inode);
+ err = exec_permission(inode, 0);
if (err)
return ERR_PTR(err);
@@ -1133,8 +1635,8 @@ static struct dentry *__lookup_hash(struct qstr *name,
* See if the low-level filesystem might want
* to use its own hash..
*/
- if (base->d_op && base->d_op->d_hash) {
- err = base->d_op->d_hash(base, name);
+ if (base->d_flags & DCACHE_OP_HASH) {
+ err = base->d_op->d_hash(base, inode, name);
dentry = ERR_PTR(err);
if (err < 0)
goto out;
@@ -1147,7 +1649,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
*/
dentry = d_lookup(base, name);
- if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
+ if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE))
dentry = do_revalidate(dentry, nd);
if (!dentry)
@@ -1490,6 +1992,7 @@ out_unlock:
mutex_unlock(&dir->d_inode->i_mutex);
dput(nd->path.dentry);
nd->path.dentry = path->dentry;
+
if (error)
return error;
/* Don't check for write permission, don't truncate */
@@ -1574,6 +2077,7 @@ static struct file *finish_open(struct nameidata *nd,
*/
if (will_truncate)
mnt_drop_write(nd->path.mnt);
+ path_put(&nd->path);
return filp;
exit:
@@ -1583,6 +2087,9 @@ exit:
return ERR_PTR(error);
}
+/*
+ * Handle O_CREAT case for do_filp_open
+ */
static struct file *do_last(struct nameidata *nd, struct path *path,
int open_flag, int acc_mode,
int mode, const char *pathname)
@@ -1596,50 +2103,25 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
follow_dotdot(nd);
dir = nd->path.dentry;
case LAST_DOT:
- if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {
- if (!dir->d_op->d_revalidate(dir, nd)) {
+ if (need_reval_dot(dir)) {
+ error = d_revalidate(nd->path.dentry, nd);
+ if (!error)
error = -ESTALE;
+ if (error < 0)
goto exit;
- }
}
/* fallthrough */
case LAST_ROOT:
- if (open_flag & O_CREAT)
- goto exit;
- /* fallthrough */
+ goto exit;
case LAST_BIND:
audit_inode(pathname, dir);
goto ok;
}
/* trailing slashes? */
- if (nd->last.name[nd->last.len]) {
- if (open_flag & O_CREAT)
- goto exit;
- nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
- }
-
- /* just plain open? */
- if (!(open_flag & O_CREAT)) {
- error = do_lookup(nd, &nd->last, path);
- if (error)
- goto exit;
- error = -ENOENT;
- if (!path->dentry->d_inode)
- goto exit_dput;
- if (path->dentry->d_inode->i_op->follow_link)
- return NULL;
- error = -ENOTDIR;
- if (nd->flags & LOOKUP_DIRECTORY) {
- if (!path->dentry->d_inode->i_op->lookup)
- goto exit_dput;
- }
- path_to_nameidata(path, nd);
- audit_inode(pathname, nd->path.dentry);
- goto ok;
- }
+ if (nd->last.name[nd->last.len])
+ goto exit;
- /* OK, it's O_CREAT */
mutex_lock(&dir->d_inode->i_mutex);
path->dentry = lookup_hash(nd);
@@ -1675,6 +2157,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
}
filp = nameidata_to_filp(nd);
mnt_drop_write(nd->path.mnt);
+ path_put(&nd->path);
if (!IS_ERR(filp)) {
error = ima_file_check(filp, acc_mode);
if (error) {
@@ -1709,8 +2192,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
return NULL;
path_to_nameidata(path, nd);
+ nd->inode = path->dentry->d_inode;
error = -EISDIR;
- if (S_ISDIR(path->dentry->d_inode->i_mode))
+ if (S_ISDIR(nd->inode->i_mode))
goto exit;
ok:
filp = finish_open(nd, open_flag, acc_mode);
@@ -1741,11 +2225,14 @@ struct file *do_filp_open(int dfd, const char *pathname,
struct path path;
int count = 0;
int flag = open_to_namei_flags(open_flag);
- int force_reval = 0;
+ int flags;
if (!(open_flag & O_CREAT))
mode = 0;
+ /* Must never be set by userspace */
+ open_flag &= ~FMODE_NONOTIFY;
+
/*
* O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
* check for O_DSYNC if the need any syncing at all we enforce it's
@@ -1767,54 +2254,84 @@ struct file *do_filp_open(int dfd, const char *pathname,
if (open_flag & O_APPEND)
acc_mode |= MAY_APPEND;
- /* find the parent */
-reval:
- error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
+ flags = LOOKUP_OPEN;
+ if (open_flag & O_CREAT) {
+ flags |= LOOKUP_CREATE;
+ if (open_flag & O_EXCL)
+ flags |= LOOKUP_EXCL;
+ }
+ if (open_flag & O_DIRECTORY)
+ flags |= LOOKUP_DIRECTORY;
+ if (!(open_flag & O_NOFOLLOW))
+ flags |= LOOKUP_FOLLOW;
+
+ filp = get_empty_filp();
+ if (!filp)
+ return ERR_PTR(-ENFILE);
+
+ filp->f_flags = open_flag;
+ nd.intent.open.file = filp;
+ nd.intent.open.flags = flag;
+ nd.intent.open.create_mode = mode;
+
+ if (open_flag & O_CREAT)
+ goto creat;
+
+ /* !O_CREAT, simple open */
+ error = do_path_lookup(dfd, pathname, flags, &nd);
+ if (unlikely(error))
+ goto out_filp;
+ error = -ELOOP;
+ if (!(nd.flags & LOOKUP_FOLLOW)) {
+ if (nd.inode->i_op->follow_link)
+ goto out_path;
+ }
+ error = -ENOTDIR;
+ if (nd.flags & LOOKUP_DIRECTORY) {
+ if (!nd.inode->i_op->lookup)
+ goto out_path;
+ }
+ audit_inode(pathname, nd.path.dentry);
+ filp = finish_open(&nd, open_flag, acc_mode);
+ return filp;
+
+creat:
+ /* OK, have to create the file. Find the parent. */
+ error = path_init_rcu(dfd, pathname,
+ LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
if (error)
- return ERR_PTR(error);
- if (force_reval)
- nd.flags |= LOOKUP_REVAL;
+ goto out_filp;
+ error = path_walk_rcu(pathname, &nd);
+ path_finish_rcu(&nd);
+ if (unlikely(error == -ECHILD || error == -ESTALE)) {
+ /* slower, locked walk */
+ if (error == -ESTALE) {
+reval:
+ flags |= LOOKUP_REVAL;
+ }
+ error = path_init(dfd, pathname,
+ LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
+ if (error)
+ goto out_filp;
- current->total_link_count = 0;
- error = link_path_walk(pathname, &nd);
- if (error) {
- filp = ERR_PTR(error);
- goto out;
+ error = path_walk_simple(pathname, &nd);
}
- if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT))
+ if (unlikely(error))
+ goto out_filp;
+ if (unlikely(!audit_dummy_context()))
audit_inode(pathname, nd.path.dentry);
/*
* We have the parent and last component.
*/
-
- error = -ENFILE;
- filp = get_empty_filp();
- if (filp == NULL)
- goto exit_parent;
- nd.intent.open.file = filp;
- filp->f_flags = open_flag;
- nd.intent.open.flags = flag;
- nd.intent.open.create_mode = mode;
- nd.flags &= ~LOOKUP_PARENT;
- nd.flags |= LOOKUP_OPEN;
- if (open_flag & O_CREAT) {
- nd.flags |= LOOKUP_CREATE;
- if (open_flag & O_EXCL)
- nd.flags |= LOOKUP_EXCL;
- }
- if (open_flag & O_DIRECTORY)
- nd.flags |= LOOKUP_DIRECTORY;
- if (!(open_flag & O_NOFOLLOW))
- nd.flags |= LOOKUP_FOLLOW;
+ nd.flags = flags;
filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
while (unlikely(!filp)) { /* trailing symlink */
struct path holder;
- struct inode *inode = path.dentry->d_inode;
void *cookie;
error = -ELOOP;
/* S_ISDIR part is a temporary automount kludge */
- if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))
+ if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode))
goto exit_dput;
if (count++ == 32)
goto exit_dput;
@@ -1835,36 +2352,33 @@ reval:
goto exit_dput;
error = __do_follow_link(&path, &nd, &cookie);
if (unlikely(error)) {
+ if (!IS_ERR(cookie) && nd.inode->i_op->put_link)
+ nd.inode->i_op->put_link(path.dentry, &nd, cookie);
/* nd.path had been dropped */
- if (!IS_ERR(cookie) && inode->i_op->put_link)
- inode->i_op->put_link(path.dentry, &nd, cookie);
- path_put(&path);
- release_open_intent(&nd);
- filp = ERR_PTR(error);
- goto out;
+ nd.path = path;
+ goto out_path;
}
holder = path;
nd.flags &= ~LOOKUP_PARENT;
filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
- if (inode->i_op->put_link)
- inode->i_op->put_link(holder.dentry, &nd, cookie);
+ if (nd.inode->i_op->put_link)
+ nd.inode->i_op->put_link(holder.dentry, &nd, cookie);
path_put(&holder);
}
out:
if (nd.root.mnt)
path_put(&nd.root);
- if (filp == ERR_PTR(-ESTALE) && !force_reval) {
- force_reval = 1;
+ if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
goto reval;
- }
return filp;
exit_dput:
path_put_conditional(&path, &nd);
+out_path:
+ path_put(&nd.path);
+out_filp:
if (!IS_ERR(nd.intent.open.file))
release_open_intent(&nd);
-exit_parent:
- path_put(&nd.path);
filp = ERR_PTR(error);
goto out;
}
@@ -2125,12 +2639,10 @@ void dentry_unhash(struct dentry *dentry)
{
dget(dentry);
shrink_dcache_parent(dentry);
- spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count) == 2)
+ if (dentry->d_count == 2)
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
}
int vfs_rmdir(struct inode *dir, struct dentry *dentry)
diff --git a/fs/namespace.c b/fs/namespace.c
index 8a415c9..3ddfd90 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -13,7 +13,6 @@
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/percpu.h>
-#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/acct.h>
@@ -139,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt)
mnt->mnt_group_id = 0;
}
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void mnt_add_count(struct vfsmount *mnt, int n)
+{
+#ifdef CONFIG_SMP
+ this_cpu_add(mnt->mnt_pcp->mnt_count, n);
+#else
+ preempt_disable();
+ mnt->mnt_count += n;
+ preempt_enable();
+#endif
+}
+
+static inline void mnt_set_count(struct vfsmount *mnt, int n)
+{
+#ifdef CONFIG_SMP
+ this_cpu_write(mnt->mnt_pcp->mnt_count, n);
+#else
+ mnt->mnt_count = n;
+#endif
+}
+
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void mnt_inc_count(struct vfsmount *mnt)
+{
+ mnt_add_count(mnt, 1);
+}
+
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void mnt_dec_count(struct vfsmount *mnt)
+{
+ mnt_add_count(mnt, -1);
+}
+
+/*
+ * vfsmount lock must be held for write
+ */
+unsigned int mnt_get_count(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ unsigned int count = atomic_read(&mnt->mnt_longrefs);
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
+ }
+
+ return count;
+#else
+ return mnt->mnt_count;
+#endif
+}
+
struct vfsmount *alloc_vfsmnt(const char *name)
{
struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -155,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name)
goto out_free_id;
}
- atomic_set(&mnt->mnt_count, 1);
+#ifdef CONFIG_SMP
+ mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
+ if (!mnt->mnt_pcp)
+ goto out_free_devname;
+
+ atomic_set(&mnt->mnt_longrefs, 1);
+#else
+ mnt->mnt_count = 1;
+ mnt->mnt_writers = 0;
+#endif
+
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -167,13 +234,6 @@ struct vfsmount *alloc_vfsmnt(const char *name)
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
#endif
-#ifdef CONFIG_SMP
- mnt->mnt_writers = alloc_percpu(int);
- if (!mnt->mnt_writers)
- goto out_free_devname;
-#else
- mnt->mnt_writers = 0;
-#endif
}
return mnt;
@@ -217,32 +277,32 @@ int __mnt_is_readonly(struct vfsmount *mnt)
}
EXPORT_SYMBOL_GPL(__mnt_is_readonly);
-static inline void inc_mnt_writers(struct vfsmount *mnt)
+static inline void mnt_inc_writers(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
- (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
+ this_cpu_inc(mnt->mnt_pcp->mnt_writers);
#else
mnt->mnt_writers++;
#endif
}
-static inline void dec_mnt_writers(struct vfsmount *mnt)
+static inline void mnt_dec_writers(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
- (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
+ this_cpu_dec(mnt->mnt_pcp->mnt_writers);
#else
mnt->mnt_writers--;
#endif
}
-static unsigned int count_mnt_writers(struct vfsmount *mnt)
+static unsigned int mnt_get_writers(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
unsigned int count = 0;
int cpu;
for_each_possible_cpu(cpu) {
- count += *per_cpu_ptr(mnt->mnt_writers, cpu);
+ count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
}
return count;
@@ -274,9 +334,9 @@ int mnt_want_write(struct vfsmount *mnt)
int ret = 0;
preempt_disable();
- inc_mnt_writers(mnt);
+ mnt_inc_writers(mnt);
/*
- * The store to inc_mnt_writers must be visible before we pass
+ * The store to mnt_inc_writers must be visible before we pass
* MNT_WRITE_HOLD loop below, so that the slowpath can see our
* incremented count after it has set MNT_WRITE_HOLD.
*/
@@ -290,7 +350,7 @@ int mnt_want_write(struct vfsmount *mnt)
*/
smp_rmb();
if (__mnt_is_readonly(mnt)) {
- dec_mnt_writers(mnt);
+ mnt_dec_writers(mnt);
ret = -EROFS;
goto out;
}
@@ -318,7 +378,7 @@ int mnt_clone_write(struct vfsmount *mnt)
if (__mnt_is_readonly(mnt))
return -EROFS;
preempt_disable();
- inc_mnt_writers(mnt);
+ mnt_inc_writers(mnt);
preempt_enable();
return 0;
}
@@ -352,7 +412,7 @@ EXPORT_SYMBOL_GPL(mnt_want_write_file);
void mnt_drop_write(struct vfsmount *mnt)
{
preempt_disable();
- dec_mnt_writers(mnt);
+ mnt_dec_writers(mnt);
preempt_enable();
}
EXPORT_SYMBOL_GPL(mnt_drop_write);
@@ -385,7 +445,7 @@ static int mnt_make_readonly(struct vfsmount *mnt)
* MNT_WRITE_HOLD, so it can't be decremented by another CPU while
* we're counting up here.
*/
- if (count_mnt_writers(mnt) > 0)
+ if (mnt_get_writers(mnt) > 0)
ret = -EBUSY;
else
mnt->mnt_flags |= MNT_READONLY;
@@ -419,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt)
kfree(mnt->mnt_devname);
mnt_free_id(mnt);
#ifdef CONFIG_SMP
- free_percpu(mnt->mnt_writers);
+ free_percpu(mnt->mnt_pcp);
#endif
kmem_cache_free(mnt_cache, mnt);
}
@@ -493,6 +553,27 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
}
/*
+ * Clear dentry's mounted state if it has no remaining mounts.
+ * vfsmount_lock must be held for write.
+ */
+static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry)
+{
+ unsigned u;
+
+ for (u = 0; u < HASH_SIZE; u++) {
+ struct vfsmount *p;
+
+ list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
+ if (p->mnt_mountpoint == dentry)
+ return;
+ }
+ }
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_MOUNTED;
+ spin_unlock(&dentry->d_lock);
+}
+
+/*
* vfsmount lock must be held for write
*/
static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
@@ -503,7 +584,7 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
mnt->mnt_mountpoint = mnt->mnt_root;
list_del_init(&mnt->mnt_child);
list_del_init(&mnt->mnt_hash);
- old_path->dentry->d_mounted--;
+ dentry_reset_mounted(old_path->mnt, old_path->dentry);
}
/*
@@ -514,7 +595,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
{
child_mnt->mnt_parent = mntget(mnt);
child_mnt->mnt_mountpoint = dget(dentry);
- dentry->d_mounted++;
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_MOUNTED;
+ spin_unlock(&dentry->d_lock);
}
/*
@@ -630,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
return NULL;
}
-static inline void __mntput(struct vfsmount *mnt)
+static inline void mntfree(struct vfsmount *mnt)
{
struct super_block *sb = mnt->mnt_sb;
+
/*
* This probably indicates that somebody messed
* up a mnt_want/drop_write() pair. If this
@@ -640,38 +724,123 @@ static inline void __mntput(struct vfsmount *mnt)
* to make r/w->r/o transitions.
*/
/*
- * atomic_dec_and_lock() used to deal with ->mnt_count decrements
- * provides barriers, so count_mnt_writers() below is safe. AV
+ * The locking used to deal with mnt_count decrement provides barriers,
+ * so mnt_get_writers() below is safe.
*/
- WARN_ON(count_mnt_writers(mnt));
+ WARN_ON(mnt_get_writers(mnt));
fsnotify_vfsmount_delete(mnt);
dput(mnt->mnt_root);
free_vfsmnt(mnt);
deactivate_super(sb);
}
-void mntput_no_expire(struct vfsmount *mnt)
-{
-repeat:
- if (atomic_add_unless(&mnt->mnt_count, -1, 1))
- return;
+#ifdef CONFIG_SMP
+static inline void __mntput(struct vfsmount *mnt, int longrefs)
+{
+ if (!longrefs) {
+put_again:
+ br_read_lock(vfsmount_lock);
+ if (likely(atomic_read(&mnt->mnt_longrefs))) {
+ mnt_dec_count(mnt);
+ br_read_unlock(vfsmount_lock);
+ return;
+ }
+ br_read_unlock(vfsmount_lock);
+ } else {
+ BUG_ON(!atomic_read(&mnt->mnt_longrefs));
+ if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
+ return;
+ }
+
br_write_lock(vfsmount_lock);
- if (!atomic_dec_and_test(&mnt->mnt_count)) {
+ if (!longrefs)
+ mnt_dec_count(mnt);
+ else
+ atomic_dec(&mnt->mnt_longrefs);
+ if (mnt_get_count(mnt)) {
br_write_unlock(vfsmount_lock);
return;
}
- if (likely(!mnt->mnt_pinned)) {
+ if (unlikely(mnt->mnt_pinned)) {
+ mnt_add_count(mnt, mnt->mnt_pinned + 1);
+ mnt->mnt_pinned = 0;
br_write_unlock(vfsmount_lock);
- __mntput(mnt);
+ acct_auto_close_mnt(mnt);
+ goto put_again;
+ }
+ br_write_unlock(vfsmount_lock);
+ mntfree(mnt);
+}
+#else
+static inline void __mntput(struct vfsmount *mnt, int longrefs)
+{
+put_again:
+ mnt_dec_count(mnt);
+ if (likely(mnt_get_count(mnt)))
return;
+ br_write_lock(vfsmount_lock);
+ if (unlikely(mnt->mnt_pinned)) {
+ mnt_add_count(mnt, mnt->mnt_pinned + 1);
+ mnt->mnt_pinned = 0;
+ br_write_unlock(vfsmount_lock);
+ acct_auto_close_mnt(mnt);
+ goto put_again;
}
- atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
- mnt->mnt_pinned = 0;
br_write_unlock(vfsmount_lock);
- acct_auto_close_mnt(mnt);
- goto repeat;
+ mntfree(mnt);
+}
+#endif
+
+static void mntput_no_expire(struct vfsmount *mnt)
+{
+ __mntput(mnt, 0);
+}
+
+void mntput(struct vfsmount *mnt)
+{
+ if (mnt) {
+ /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
+ if (unlikely(mnt->mnt_expiry_mark))
+ mnt->mnt_expiry_mark = 0;
+ __mntput(mnt, 0);
+ }
+}
+EXPORT_SYMBOL(mntput);
+
+struct vfsmount *mntget(struct vfsmount *mnt)
+{
+ if (mnt)
+ mnt_inc_count(mnt);
+ return mnt;
}
-EXPORT_SYMBOL(mntput_no_expire);
+EXPORT_SYMBOL(mntget);
+
+void mntput_long(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ if (mnt) {
+ /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
+ if (unlikely(mnt->mnt_expiry_mark))
+ mnt->mnt_expiry_mark = 0;
+ __mntput(mnt, 1);
+ }
+#else
+ mntput(mnt);
+#endif
+}
+EXPORT_SYMBOL(mntput_long);
+
+struct vfsmount *mntget_long(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ if (mnt)
+ atomic_inc(&mnt->mnt_longrefs);
+ return mnt;
+#else
+ return mntget(mnt);
+#endif
+}
+EXPORT_SYMBOL(mntget_long);
void mnt_pin(struct vfsmount *mnt)
{
@@ -679,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt)
mnt->mnt_pinned++;
br_write_unlock(vfsmount_lock);
}
-
EXPORT_SYMBOL(mnt_pin);
void mnt_unpin(struct vfsmount *mnt)
{
br_write_lock(vfsmount_lock);
if (mnt->mnt_pinned) {
- atomic_inc(&mnt->mnt_count);
+ mnt_inc_count(mnt);
mnt->mnt_pinned--;
}
br_write_unlock(vfsmount_lock);
}
-
EXPORT_SYMBOL(mnt_unpin);
static inline void mangle(struct seq_file *m, const char *s)
@@ -986,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt)
int minimum_refs = 0;
struct vfsmount *p;
- br_read_lock(vfsmount_lock);
+ /* write lock needed for mnt_get_count */
+ br_write_lock(vfsmount_lock);
for (p = mnt; p; p = next_mnt(p, mnt)) {
- actual_refs += atomic_read(&p->mnt_count);
+ actual_refs += mnt_get_count(p);
minimum_refs += 2;
}
- br_read_unlock(vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
if (actual_refs > minimum_refs)
return 0;
@@ -1018,10 +1186,10 @@ int may_umount(struct vfsmount *mnt)
{
int ret = 1;
down_read(&namespace_sem);
- br_read_lock(vfsmount_lock);
+ br_write_lock(vfsmount_lock);
if (propagate_mount_busy(mnt, 2))
ret = 0;
- br_read_unlock(vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
up_read(&namespace_sem);
return ret;
}
@@ -1048,7 +1216,7 @@ void release_mounts(struct list_head *head)
dput(dentry);
mntput(m);
}
- mntput(mnt);
+ mntput_long(mnt);
}
}
@@ -1074,7 +1242,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
list_del_init(&p->mnt_child);
if (p->mnt_parent != p) {
p->mnt_parent->mnt_ghosts++;
- p->mnt_mountpoint->d_mounted--;
+ dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint);
}
change_mnt_propagation(p, MS_PRIVATE);
}
@@ -1103,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags)
flags & (MNT_FORCE | MNT_DETACH))
return -EINVAL;
- if (atomic_read(&mnt->mnt_count) != 2)
+ /*
+ * probably don't strictly need the lock here if we examined
+ * all race cases, but it's a slowpath.
+ */
+ br_write_lock(vfsmount_lock);
+ if (mnt_get_count(mnt) != 2) {
+ br_write_lock(vfsmount_lock);
return -EBUSY;
+ }
+ br_write_unlock(vfsmount_lock);
if (!xchg(&mnt->mnt_expiry_mark, 1))
return -EAGAIN;
@@ -1793,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
unlock:
up_write(&namespace_sem);
- mntput(newmnt);
+ mntput_long(newmnt);
return err;
}
@@ -2126,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
if (fs) {
if (p == fs->root.mnt) {
rootmnt = p;
- fs->root.mnt = mntget(q);
+ fs->root.mnt = mntget_long(q);
}
if (p == fs->pwd.mnt) {
pwdmnt = p;
- fs->pwd.mnt = mntget(q);
+ fs->pwd.mnt = mntget_long(q);
}
}
p = next_mnt(p, mnt_ns->root);
@@ -2139,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
up_write(&namespace_sem);
if (rootmnt)
- mntput(rootmnt);
+ mntput_long(rootmnt);
if (pwdmnt)
- mntput(pwdmnt);
+ mntput_long(pwdmnt);
return new_ns;
}
@@ -2328,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
touch_mnt_namespace(current->nsproxy->mnt_ns);
br_write_unlock(vfsmount_lock);
chroot_fs_refs(&root, &new);
+
error = 0;
path_put(&root_parent);
path_put(&parent_path);
@@ -2354,6 +2531,7 @@ static void __init init_mount_tree(void)
mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
if (IS_ERR(mnt))
panic("Can't create rootfs");
+
ns = create_mnt_ns(mnt);
if (IS_ERR(ns))
panic("Can't allocate initial namespace");
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index aac8832e..28f136d 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -17,9 +17,9 @@
#include <linux/kernel.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
+#include <linux/namei.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
-#include <linux/smp_lock.h>
#include <linux/ncp_fs.h>
@@ -75,9 +75,12 @@ const struct inode_operations ncp_dir_inode_operations =
* Dentry operations routines
*/
static int ncp_lookup_validate(struct dentry *, struct nameidata *);
-static int ncp_hash_dentry(struct dentry *, struct qstr *);
-static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *);
-static int ncp_delete_dentry(struct dentry *);
+static int ncp_hash_dentry(const struct dentry *, const struct inode *,
+ struct qstr *);
+static int ncp_compare_dentry(const struct dentry *, const struct inode *,
+ const struct dentry *, const struct inode *,
+ unsigned int, const char *, const struct qstr *);
+static int ncp_delete_dentry(const struct dentry *);
static const struct dentry_operations ncp_dentry_operations =
{
@@ -114,10 +117,10 @@ static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator)
#define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS)
-static inline int ncp_case_sensitive(struct dentry *dentry)
+static inline int ncp_case_sensitive(const struct inode *i)
{
#ifdef CONFIG_NCPFS_NFS_NS
- return ncp_namespace(dentry->d_inode) == NW_NS_NFS;
+ return ncp_namespace(i) == NW_NS_NFS;
#else
return 0;
#endif /* CONFIG_NCPFS_NFS_NS */
@@ -128,14 +131,16 @@ static inline int ncp_case_sensitive(struct dentry *dentry)
* is case-sensitive.
*/
static int
-ncp_hash_dentry(struct dentry *dentry, struct qstr *this)
+ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *this)
{
- if (!ncp_case_sensitive(dentry)) {
+ if (!ncp_case_sensitive(inode)) {
+ struct super_block *sb = dentry->d_sb;
struct nls_table *t;
unsigned long hash;
int i;
- t = NCP_IO_TABLE(dentry);
+ t = NCP_IO_TABLE(sb);
hash = init_name_hash();
for (i=0; i<this->len ; i++)
hash = partial_name_hash(ncp_tolower(t, this->name[i]),
@@ -146,15 +151,17 @@ ncp_hash_dentry(struct dentry *dentry, struct qstr *this)
}
static int
-ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
+ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- if (a->len != b->len)
+ if (len != name->len)
return 1;
- if (ncp_case_sensitive(dentry))
- return strncmp(a->name, b->name, a->len);
+ if (ncp_case_sensitive(pinode))
+ return strncmp(str, name->name, len);
- return ncp_strnicmp(NCP_IO_TABLE(dentry), a->name, b->name, a->len);
+ return ncp_strnicmp(NCP_IO_TABLE(pinode->i_sb), str, name->name, len);
}
/*
@@ -163,7 +170,7 @@ ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
* Closing files can be safely postponed until iput() - it's done there anyway.
*/
static int
-ncp_delete_dentry(struct dentry * dentry)
+ncp_delete_dentry(const struct dentry * dentry)
{
struct inode *inode = dentry->d_inode;
@@ -302,6 +309,9 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd)
int res, val = 0, len;
__u8 __name[NCP_MAXPATHLEN + 1];
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
parent = dget_parent(dentry);
dir = parent->d_inode;
@@ -385,21 +395,21 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
}
/* If a pointer is invalid, we search the dentry. */
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
dent = list_entry(next, struct dentry, d_u.d_child);
if ((unsigned long)dent->d_fsdata == fpos) {
if (dent->d_inode)
- dget_locked(dent);
+ dget(dent);
else
dent = NULL;
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
goto out;
}
next = next->next;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
return NULL;
out:
@@ -593,7 +603,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
qname.hash = full_name_hash(qname.name, qname.len);
if (dentry->d_op && dentry->d_op->d_hash)
- if (dentry->d_op->d_hash(dentry, &qname) != 0)
+ if (dentry->d_op->d_hash(dentry, dentry->d_inode, &qname) != 0)
goto end_advance;
newdent = d_lookup(dentry, &qname);
@@ -612,35 +622,12 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
shrink_dcache_parent(newdent);
/*
- * It is not as dangerous as it looks. NetWare's OS2 namespace is
- * case preserving yet case insensitive. So we update dentry's name
- * as received from server. We found dentry via d_lookup with our
- * hash, so we know that hash does not change, and so replacing name
- * should be reasonably safe.
+ * NetWare's OS2 namespace is case preserving yet case
+ * insensitive. So we update dentry's name as received from
+ * server. Parent dir's i_mutex is locked because we're in
+ * readdir.
*/
- if (qname.len == newdent->d_name.len &&
- memcmp(newdent->d_name.name, qname.name, newdent->d_name.len)) {
- struct inode *inode = newdent->d_inode;
-
- /*
- * Inside ncpfs all uses of d_name are either for debugging,
- * or on functions which acquire inode mutex (mknod, creat,
- * lookup). So grab i_mutex here, to be sure. d_path
- * uses dcache_lock when generating path, so we should too.
- * And finally d_compare is protected by dentry's d_lock, so
- * here we go.
- */
- if (inode)
- mutex_lock(&inode->i_mutex);
- spin_lock(&dcache_lock);
- spin_lock(&newdent->d_lock);
- memcpy((char *) newdent->d_name.name, qname.name,
- newdent->d_name.len);
- spin_unlock(&newdent->d_lock);
- spin_unlock(&dcache_lock);
- if (inode)
- mutex_unlock(&inode->i_mutex);
- }
+ dentry_update_name_case(newdent, &qname);
}
if (!newdent->d_inode) {
@@ -650,7 +637,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
entry->ino = iunique(dir->i_sb, 2);
inode = ncp_iget(dir->i_sb, entry);
if (inode) {
- newdent->d_op = &ncp_dentry_operations;
+ d_set_d_op(newdent, &ncp_dentry_operations);
d_instantiate(newdent, inode);
if (!hashed)
d_rehash(newdent);
@@ -658,7 +645,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
} else {
struct inode *inode = newdent->d_inode;
- mutex_lock(&inode->i_mutex);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
ncp_update_inode2(inode, entry);
mutex_unlock(&inode->i_mutex);
}
@@ -906,7 +893,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc
if (inode) {
ncp_new_dentry(dentry);
add_entry:
- dentry->d_op = &ncp_dentry_operations;
+ d_set_d_op(dentry, &ncp_dentry_operations);
d_add(dentry, inode);
error = 0;
}
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 6c754f7..cb50aaf 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -17,7 +17,6 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
-#include <linux/smp_lock.h>
#include <linux/ncp_fs.h>
#include "ncplib_kernel.h"
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 985fabb..9b39a5d 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -26,10 +26,10 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
-#include <linux/smp_lock.h>
#include <linux/vfs.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
+#include <linux/namei.h>
#include <linux/ncp_fs.h>
@@ -59,11 +59,18 @@ static struct inode *ncp_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void ncp_destroy_inode(struct inode *inode)
+static void ncp_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
}
+static void ncp_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, ncp_i_callback);
+}
+
static void init_once(void *foo)
{
struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
@@ -310,7 +317,12 @@ static void ncp_stop_tasks(struct ncp_server *server) {
sk->sk_write_space = server->write_space;
release_sock(sk);
del_timer_sync(&server->timeout_tm);
- flush_scheduled_work();
+
+ flush_work_sync(&server->rcv.tq);
+ if (sk->sk_socket->type == SOCK_STREAM)
+ flush_work_sync(&server->tx.tq);
+ else
+ flush_work_sync(&server->timeout_tq);
}
static int ncp_show_options(struct seq_file *seq, struct vfsmount *mnt)
@@ -711,7 +723,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
sb->s_root = d_alloc_root(root_inode);
if (!sb->s_root)
goto out_no_root;
- sb->s_root->d_op = &ncp_root_dentry_operations;
+ d_set_d_op(sb->s_root, &ncp_root_dentry_operations);
return 0;
out_no_root:
@@ -1020,16 +1032,16 @@ out:
return result;
}
-static int ncp_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ncp_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt);
+ return mount_nodev(fs_type, flags, data, ncp_fill_super);
}
static struct file_system_type ncp_fs_type = {
.owner = THIS_MODULE,
.name = "ncpfs",
- .get_sb = ncp_get_sb,
+ .mount = ncp_mount,
.kill_sb = kill_anon_super,
.fs_flags = FS_BINARY_MOUNTDATA,
};
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index c2a1f9a..d40a547 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -17,7 +17,6 @@
#include <linux/mount.h>
#include <linux/slab.h>
#include <linux/highuid.h>
-#include <linux/smp_lock.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 3c57eca..1220df7 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -135,7 +135,7 @@ int ncp__vol2io(struct ncp_server *, unsigned char *, unsigned int *,
const unsigned char *, unsigned int, int);
#define NCP_ESC ':'
-#define NCP_IO_TABLE(dentry) (NCP_SERVER((dentry)->d_inode)->nls_io)
+#define NCP_IO_TABLE(sb) (NCP_SBP(sb)->nls_io)
#define ncp_tolower(t, c) nls_tolower(t, c)
#define ncp_toupper(t, c) nls_toupper(t, c)
#define ncp_strnicmp(t, s1, s2, len) \
@@ -150,15 +150,15 @@ int ncp__io2vol(unsigned char *, unsigned int *,
int ncp__vol2io(unsigned char *, unsigned int *,
const unsigned char *, unsigned int, int);
-#define NCP_IO_TABLE(dentry) NULL
+#define NCP_IO_TABLE(sb) NULL
#define ncp_tolower(t, c) tolower(c)
#define ncp_toupper(t, c) toupper(c)
#define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U)
#define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U)
-static inline int ncp_strnicmp(struct nls_table *t, const unsigned char *s1,
- const unsigned char *s2, int len)
+static inline int ncp_strnicmp(const struct nls_table *t,
+ const unsigned char *s1, const unsigned char *s2, int len)
{
while (len--) {
if (tolower(*s1++) != tolower(*s2++))
@@ -193,7 +193,7 @@ ncp_renew_dentries(struct dentry *parent)
struct list_head *next;
struct dentry *dentry;
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -205,7 +205,7 @@ ncp_renew_dentries(struct dentry *parent)
next = next->next;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
}
static inline void
@@ -215,7 +215,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
struct list_head *next;
struct dentry *dentry;
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -223,7 +223,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
ncp_age_dentry(server, dentry);
next = next->next;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
}
struct ncp_cache_head {
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index aeec017..1990165 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -9,7 +9,6 @@
#include <linux/completion.h>
#include <linux/ip.h>
#include <linux/module.h>
-#include <linux/smp_lock.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/nfs_fs.h>
@@ -17,9 +16,7 @@
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/sunrpc/svcauth_gss.h>
-#if defined(CONFIG_NFS_V4_1)
#include <linux/sunrpc/bc_xprt.h>
-#endif
#include <net/inet_sock.h>
@@ -138,6 +135,33 @@ out_err:
#if defined(CONFIG_NFS_V4_1)
/*
+ * * CB_SEQUENCE operations will fail until the callback sessionid is set.
+ * */
+int nfs4_set_callback_sessionid(struct nfs_client *clp)
+{
+ struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
+ struct nfs4_sessionid *bc_sid;
+
+ if (!serv->sv_bc_xprt)
+ return -EINVAL;
+
+ /* on success freed in xprt_free */
+ bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL);
+ if (!bc_sid)
+ return -ENOMEM;
+ memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
+ NFS4_MAX_SESSIONID_LEN);
+ spin_lock_bh(&serv->sv_cb_lock);
+ serv->sv_bc_xprt->xpt_bc_sid = bc_sid;
+ spin_unlock_bh(&serv->sv_cb_lock);
+ dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__,
+ ((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
+ ((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
+ serv->sv_bc_xprt);
+ return 0;
+}
+
+/*
* The callback service for NFSv4.1 callbacks
*/
static int
@@ -178,30 +202,38 @@ nfs41_callback_svc(void *vrqstp)
struct svc_rqst *
nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
{
- struct svc_xprt *bc_xprt;
- struct svc_rqst *rqstp = ERR_PTR(-ENOMEM);
+ struct svc_rqst *rqstp;
+ int ret;
- dprintk("--> %s\n", __func__);
- /* Create a svc_sock for the service */
- bc_xprt = svc_sock_create(serv, xprt->prot);
- if (!bc_xprt)
+ /*
+ * Create an svc_sock for the back channel service that shares the
+ * fore channel connection.
+ * Returns the input port (0) and sets the svc_serv bc_xprt on success
+ */
+ ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0,
+ SVC_SOCK_ANONYMOUS);
+ if (ret < 0) {
+ rqstp = ERR_PTR(ret);
goto out;
+ }
/*
* Save the svc_serv in the transport so that it can
* be referenced when the session backchannel is initialized
*/
- serv->bc_xprt = bc_xprt;
xprt->bc_serv = serv;
INIT_LIST_HEAD(&serv->sv_cb_list);
spin_lock_init(&serv->sv_cb_lock);
init_waitqueue_head(&serv->sv_cb_waitq);
rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
- if (IS_ERR(rqstp))
- svc_sock_destroy(bc_xprt);
+ if (IS_ERR(rqstp)) {
+ svc_xprt_put(serv->sv_bc_xprt);
+ serv->sv_bc_xprt = NULL;
+ }
out:
- dprintk("--> %s return %p\n", __func__, rqstp);
+ dprintk("--> %s return %ld\n", __func__,
+ IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0);
return rqstp;
}
@@ -234,6 +266,10 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
struct nfs_callback_data *cb_info)
{
}
+int nfs4_set_callback_sessionid(struct nfs_client *clp)
+{
+ return 0;
+}
#endif /* CONFIG_NFS_V4_1 */
/*
@@ -329,6 +365,9 @@ static int check_gss_callback_principal(struct nfs_client *clp,
struct rpc_clnt *r = clp->cl_rpcclient;
char *p = svc_gss_principal(rqstp);
+ /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */
+ if (clp->cl_minorversion != 0)
+ return SVC_DROP;
/*
* It might just be a normal user principal, in which case
* userspace won't bother to tell us the name at all.
@@ -346,6 +385,23 @@ static int check_gss_callback_principal(struct nfs_client *clp,
return SVC_OK;
}
+/* pg_authenticate method helper */
+static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp)
+{
+ struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp);
+ int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0;
+
+ dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc);
+ if (svc_is_backchannel(rqstp))
+ /* Sessionid (usually) set after CB_NULL ping */
+ return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid,
+ is_cb_compound);
+ else
+ /* No callback identifier in pg_authenticate */
+ return nfs4_find_client_no_ident(svc_addr(rqstp));
+}
+
+/* pg_authenticate method for nfsv4 callback threads. */
static int nfs_callback_authenticate(struct svc_rqst *rqstp)
{
struct nfs_client *clp;
@@ -353,7 +409,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
int ret = SVC_OK;
/* Don't talk to strangers */
- clp = nfs_find_client(svc_addr(rqstp), 4);
+ clp = nfs_cb_find_client(rqstp);
if (clp == NULL)
return SVC_DROP;
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 85a7cfd..d3b44f9 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -34,10 +34,17 @@ enum nfs4_callback_opnum {
OP_CB_ILLEGAL = 10044,
};
+struct cb_process_state {
+ __be32 drc_status;
+ struct nfs_client *clp;
+ struct nfs4_sessionid *svc_sid; /* v4.1 callback service sessionid */
+};
+
struct cb_compound_hdr_arg {
unsigned int taglen;
const char *tag;
unsigned int minorversion;
+ unsigned int cb_ident; /* v4.0 callback identifier */
unsigned nops;
};
@@ -103,14 +110,23 @@ struct cb_sequenceres {
uint32_t csr_target_highestslotid;
};
-extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
- struct cb_sequenceres *res);
+extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
+ struct cb_sequenceres *res,
+ struct cb_process_state *cps);
extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
const nfs4_stateid *stateid);
#define RCA4_TYPE_MASK_RDATA_DLG 0
#define RCA4_TYPE_MASK_WDATA_DLG 1
+#define RCA4_TYPE_MASK_DIR_DLG 2
+#define RCA4_TYPE_MASK_FILE_LAYOUT 3
+#define RCA4_TYPE_MASK_BLK_LAYOUT 4
+#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN 8
+#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX 9
+#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
+#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
+#define RCA4_TYPE_MASK_ALL 0xf31f
struct cb_recallanyargs {
struct sockaddr *craa_addr;
@@ -118,25 +134,52 @@ struct cb_recallanyargs {
uint32_t craa_type_mask;
};
-extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy);
+extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args,
+ void *dummy,
+ struct cb_process_state *cps);
struct cb_recallslotargs {
struct sockaddr *crsa_addr;
uint32_t crsa_target_max_slots;
};
-extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
- void *dummy);
+extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
+ void *dummy,
+ struct cb_process_state *cps);
+
+struct cb_layoutrecallargs {
+ struct sockaddr *cbl_addr;
+ uint32_t cbl_recall_type;
+ uint32_t cbl_layout_type;
+ uint32_t cbl_layoutchanged;
+ union {
+ struct {
+ struct nfs_fh cbl_fh;
+ struct pnfs_layout_range cbl_range;
+ nfs4_stateid cbl_stateid;
+ };
+ struct nfs_fsid cbl_fsid;
+ };
+};
-#endif /* CONFIG_NFS_V4_1 */
+extern unsigned nfs4_callback_layoutrecall(
+ struct cb_layoutrecallargs *args,
+ void *dummy, struct cb_process_state *cps);
-extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
-extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
+extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
+extern void nfs4_cb_take_slot(struct nfs_client *clp);
+#endif /* CONFIG_NFS_V4_1 */
+extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
+ struct cb_getattrres *res,
+ struct cb_process_state *cps);
+extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+ struct cb_process_state *cps);
#ifdef CONFIG_NFS_V4
extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
extern void nfs_callback_down(int minorversion);
extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
const nfs4_stateid *stateid);
+extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
#endif /* CONFIG_NFS_V4 */
/*
* nfs41: Callbacks are expected to not cause substantial latency,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 2950fca..4bb91cb 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -12,30 +12,33 @@
#include "callback.h"
#include "delegation.h"
#include "internal.h"
+#include "pnfs.h"
#ifdef NFS_DEBUG
#define NFSDBG_FACILITY NFSDBG_CALLBACK
#endif
-
-__be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
+
+__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
+ struct cb_getattrres *res,
+ struct cb_process_state *cps)
{
- struct nfs_client *clp;
struct nfs_delegation *delegation;
struct nfs_inode *nfsi;
struct inode *inode;
+ res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+ if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
+ goto out;
+
res->bitmap[0] = res->bitmap[1] = 0;
res->status = htonl(NFS4ERR_BADHANDLE);
- clp = nfs_find_client(args->addr, 4);
- if (clp == NULL)
- goto out;
dprintk("NFS: GETATTR callback request from %s\n",
- rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
- inode = nfs_delegation_find_inode(clp, &args->fh);
+ inode = nfs_delegation_find_inode(cps->clp, &args->fh);
if (inode == NULL)
- goto out_putclient;
+ goto out;
nfsi = NFS_I(inode);
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
@@ -55,49 +58,41 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
out_iput:
rcu_read_unlock();
iput(inode);
-out_putclient:
- nfs_put_client(clp);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
return res->status;
}
-__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+ struct cb_process_state *cps)
{
- struct nfs_client *clp;
struct inode *inode;
__be32 res;
- res = htonl(NFS4ERR_BADHANDLE);
- clp = nfs_find_client(args->addr, 4);
- if (clp == NULL)
+ res = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+ if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
goto out;
dprintk("NFS: RECALL callback request from %s\n",
- rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
-
- do {
- struct nfs_client *prev = clp;
-
- inode = nfs_delegation_find_inode(clp, &args->fh);
- if (inode != NULL) {
- /* Set up a helper thread to actually return the delegation */
- switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
- case 0:
- res = 0;
- break;
- case -ENOENT:
- if (res != 0)
- res = htonl(NFS4ERR_BAD_STATEID);
- break;
- default:
- res = htonl(NFS4ERR_RESOURCE);
- }
- iput(inode);
- }
- clp = nfs_find_client_next(prev);
- nfs_put_client(prev);
- } while (clp != NULL);
+ rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+
+ res = htonl(NFS4ERR_BADHANDLE);
+ inode = nfs_delegation_find_inode(cps->clp, &args->fh);
+ if (inode == NULL)
+ goto out;
+ /* Set up a helper thread to actually return the delegation */
+ switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
+ case 0:
+ res = 0;
+ break;
+ case -ENOENT:
+ if (res != 0)
+ res = htonl(NFS4ERR_BAD_STATEID);
+ break;
+ default:
+ res = htonl(NFS4ERR_RESOURCE);
+ }
+ iput(inode);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
return res;
@@ -113,6 +108,139 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
#if defined(CONFIG_NFS_V4_1)
+static u32 initiate_file_draining(struct nfs_client *clp,
+ struct cb_layoutrecallargs *args)
+{
+ struct pnfs_layout_hdr *lo;
+ struct inode *ino;
+ bool found = false;
+ u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
+ LIST_HEAD(free_me_list);
+
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+ if (nfs_compare_fh(&args->cbl_fh,
+ &NFS_I(lo->plh_inode)->fh))
+ continue;
+ ino = igrab(lo->plh_inode);
+ if (!ino)
+ continue;
+ found = true;
+ /* Without this, layout can be freed as soon
+ * as we release cl_lock.
+ */
+ get_layout_hdr(lo);
+ break;
+ }
+ spin_unlock(&clp->cl_lock);
+ if (!found)
+ return NFS4ERR_NOMATCHING_LAYOUT;
+
+ spin_lock(&ino->i_lock);
+ if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
+ mark_matching_lsegs_invalid(lo, &free_me_list,
+ args->cbl_range.iomode))
+ rv = NFS4ERR_DELAY;
+ else
+ rv = NFS4ERR_NOMATCHING_LAYOUT;
+ pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
+ spin_unlock(&ino->i_lock);
+ pnfs_free_lseg_list(&free_me_list);
+ put_layout_hdr(lo);
+ iput(ino);
+ return rv;
+}
+
+static u32 initiate_bulk_draining(struct nfs_client *clp,
+ struct cb_layoutrecallargs *args)
+{
+ struct pnfs_layout_hdr *lo;
+ struct inode *ino;
+ u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
+ struct pnfs_layout_hdr *tmp;
+ LIST_HEAD(recall_list);
+ LIST_HEAD(free_me_list);
+ struct pnfs_layout_range range = {
+ .iomode = IOMODE_ANY,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
+
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+ if ((args->cbl_recall_type == RETURN_FSID) &&
+ memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
+ &args->cbl_fsid, sizeof(struct nfs_fsid)))
+ continue;
+ if (!igrab(lo->plh_inode))
+ continue;
+ get_layout_hdr(lo);
+ BUG_ON(!list_empty(&lo->plh_bulk_recall));
+ list_add(&lo->plh_bulk_recall, &recall_list);
+ }
+ spin_unlock(&clp->cl_lock);
+ list_for_each_entry_safe(lo, tmp,
+ &recall_list, plh_bulk_recall) {
+ ino = lo->plh_inode;
+ spin_lock(&ino->i_lock);
+ set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+ if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode))
+ rv = NFS4ERR_DELAY;
+ list_del_init(&lo->plh_bulk_recall);
+ spin_unlock(&ino->i_lock);
+ put_layout_hdr(lo);
+ iput(ino);
+ }
+ pnfs_free_lseg_list(&free_me_list);
+ return rv;
+}
+
+static u32 do_callback_layoutrecall(struct nfs_client *clp,
+ struct cb_layoutrecallargs *args)
+{
+ u32 res = NFS4ERR_DELAY;
+
+ dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
+ if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
+ goto out;
+ if (args->cbl_recall_type == RETURN_FILE)
+ res = initiate_file_draining(clp, args);
+ else
+ res = initiate_bulk_draining(clp, args);
+ clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
+out:
+ dprintk("%s returning %i\n", __func__, res);
+ return res;
+
+}
+
+__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
+ void *dummy, struct cb_process_state *cps)
+{
+ u32 res;
+
+ dprintk("%s: -->\n", __func__);
+
+ if (cps->clp)
+ res = do_callback_layoutrecall(cps->clp, args);
+ else
+ res = NFS4ERR_OP_NOT_IN_SESSION;
+
+ dprintk("%s: exit with status = %d\n", __func__, res);
+ return cpu_to_be32(res);
+}
+
+static void pnfs_recall_all_layouts(struct nfs_client *clp)
+{
+ struct cb_layoutrecallargs args;
+
+ /* Pretend we got a CB_LAYOUTRECALL(ALL) */
+ memset(&args, 0, sizeof(args));
+ args.cbl_recall_type = RETURN_ALL;
+ /* FIXME we ignore errors, what should we do? */
+ do_callback_layoutrecall(clp, &args);
+}
+
int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
{
if (delegation == NULL)
@@ -185,42 +313,6 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
}
/*
- * Returns a pointer to a held 'struct nfs_client' that matches the server's
- * address, major version number, and session ID. It is the caller's
- * responsibility to release the returned reference.
- *
- * Returns NULL if there are no connections with sessions, or if no session
- * matches the one of interest.
- */
- static struct nfs_client *find_client_with_session(
- const struct sockaddr *addr, u32 nfsversion,
- struct nfs4_sessionid *sessionid)
-{
- struct nfs_client *clp;
-
- clp = nfs_find_client(addr, 4);
- if (clp == NULL)
- return NULL;
-
- do {
- struct nfs_client *prev = clp;
-
- if (clp->cl_session != NULL) {
- if (memcmp(clp->cl_session->sess_id.data,
- sessionid->data,
- NFS4_MAX_SESSIONID_LEN) == 0) {
- /* Returns a held reference to clp */
- return clp;
- }
- }
- clp = nfs_find_client_next(prev);
- nfs_put_client(prev);
- } while (clp != NULL);
-
- return NULL;
-}
-
-/*
* For each referring call triple, check the session's slot table for
* a match. If the slot is in use and the sequence numbers match, the
* client is still waiting for a response to the original request.
@@ -276,20 +368,34 @@ out:
}
__be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
- struct cb_sequenceres *res)
+ struct cb_sequenceres *res,
+ struct cb_process_state *cps)
{
struct nfs_client *clp;
int i;
__be32 status;
+ cps->clp = NULL;
+
status = htonl(NFS4ERR_BADSESSION);
- clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid);
+ /* Incoming session must match the callback session */
+ if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN))
+ goto out;
+
+ clp = nfs4_find_client_sessionid(args->csa_addr,
+ &args->csa_sessionid, 1);
if (clp == NULL)
goto out;
+ /* state manager is resetting the session */
+ if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
+ status = NFS4ERR_DELAY;
+ goto out;
+ }
+
status = validate_seqid(&clp->cl_session->bc_slot_table, args);
if (status)
- goto out_putclient;
+ goto out;
/*
* Check for pending referring calls. If a match is found, a
@@ -298,7 +404,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
*/
if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
status = htonl(NFS4ERR_DELAY);
- goto out_putclient;
+ goto out;
}
memcpy(&res->csr_sessionid, &args->csa_sessionid,
@@ -307,83 +413,93 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
res->csr_slotid = args->csa_slotid;
res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+ nfs4_cb_take_slot(clp);
+ cps->clp = clp; /* put in nfs4_callback_compound */
-out_putclient:
- nfs_put_client(clp);
out:
for (i = 0; i < args->csa_nrclists; i++)
kfree(args->csa_rclists[i].rcl_refcalls);
kfree(args->csa_rclists);
- if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP))
- res->csr_status = 0;
- else
+ if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
+ cps->drc_status = status;
+ status = 0;
+ } else
res->csr_status = status;
+
dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
ntohl(status), ntohl(res->csr_status));
return status;
}
-__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
+static bool
+validate_bitmap_values(unsigned long mask)
+{
+ return (mask & ~RCA4_TYPE_MASK_ALL) == 0;
+}
+
+__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
+ struct cb_process_state *cps)
{
- struct nfs_client *clp;
__be32 status;
fmode_t flags = 0;
- status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
- clp = nfs_find_client(args->craa_addr, 4);
- if (clp == NULL)
+ status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+ if (!cps->clp) /* set in cb_sequence */
goto out;
dprintk("NFS: RECALL_ANY callback request from %s\n",
- rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+
+ status = cpu_to_be32(NFS4ERR_INVAL);
+ if (!validate_bitmap_values(args->craa_type_mask))
+ goto out;
+ status = cpu_to_be32(NFS4_OK);
if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
&args->craa_type_mask))
flags = FMODE_READ;
if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
&args->craa_type_mask))
flags |= FMODE_WRITE;
-
+ if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
+ &args->craa_type_mask))
+ pnfs_recall_all_layouts(cps->clp);
if (flags)
- nfs_expire_all_delegation_types(clp, flags);
- status = htonl(NFS4_OK);
+ nfs_expire_all_delegation_types(cps->clp, flags);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
}
/* Reduce the fore channel's max_slots to the target value */
-__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy)
+__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
+ struct cb_process_state *cps)
{
- struct nfs_client *clp;
struct nfs4_slot_table *fc_tbl;
__be32 status;
status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
- clp = nfs_find_client(args->crsa_addr, 4);
- if (clp == NULL)
+ if (!cps->clp) /* set in cb_sequence */
goto out;
dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
- rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+ rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
args->crsa_target_max_slots);
- fc_tbl = &clp->cl_session->fc_slot_table;
+ fc_tbl = &cps->clp->cl_session->fc_slot_table;
status = htonl(NFS4ERR_BAD_HIGH_SLOT);
if (args->crsa_target_max_slots > fc_tbl->max_slots ||
args->crsa_target_max_slots < 1)
- goto out_putclient;
+ goto out;
status = htonl(NFS4_OK);
if (args->crsa_target_max_slots == fc_tbl->max_slots)
- goto out_putclient;
+ goto out;
fc_tbl->target_max_slots = args->crsa_target_max_slots;
- nfs41_handle_recall_slot(clp);
-out_putclient:
- nfs_put_client(clp); /* balance nfs_find_client */
+ nfs41_handle_recall_slot(cps->clp);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 05af212..23112c2 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -10,8 +10,10 @@
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/slab.h>
+#include <linux/sunrpc/bc_xprt.h>
#include "nfs4_fs.h"
#include "callback.h"
+#include "internal.h"
#define CB_OP_TAGLEN_MAXSZ (512)
#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ)
@@ -22,6 +24,7 @@
#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#if defined(CONFIG_NFS_V4_1)
+#define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
4 + 1 + 3)
#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
@@ -33,7 +36,8 @@
/* Internal error code */
#define NFS4ERR_RESOURCE_HDR 11050
-typedef __be32 (*callback_process_op_t)(void *, void *);
+typedef __be32 (*callback_process_op_t)(void *, void *,
+ struct cb_process_state *);
typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
@@ -160,7 +164,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
hdr->minorversion = ntohl(*p++);
/* Check minor version is zero or one. */
if (hdr->minorversion <= 1) {
- p++; /* skip callback_ident */
+ hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */
} else {
printk(KERN_WARNING "%s: NFSv4 server callback with "
"illegal minor version %u!\n",
@@ -220,6 +224,66 @@ out:
#if defined(CONFIG_NFS_V4_1)
+static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct cb_layoutrecallargs *args)
+{
+ __be32 *p;
+ __be32 status = 0;
+ uint32_t iomode;
+
+ args->cbl_addr = svc_addr(rqstp);
+ p = read_buf(xdr, 4 * sizeof(uint32_t));
+ if (unlikely(p == NULL)) {
+ status = htonl(NFS4ERR_BADXDR);
+ goto out;
+ }
+
+ args->cbl_layout_type = ntohl(*p++);
+ /* Depite the spec's xdr, iomode really belongs in the FILE switch,
+ * as it is unuseable and ignored with the other types.
+ */
+ iomode = ntohl(*p++);
+ args->cbl_layoutchanged = ntohl(*p++);
+ args->cbl_recall_type = ntohl(*p++);
+
+ if (args->cbl_recall_type == RETURN_FILE) {
+ args->cbl_range.iomode = iomode;
+ status = decode_fh(xdr, &args->cbl_fh);
+ if (unlikely(status != 0))
+ goto out;
+
+ p = read_buf(xdr, 2 * sizeof(uint64_t));
+ if (unlikely(p == NULL)) {
+ status = htonl(NFS4ERR_BADXDR);
+ goto out;
+ }
+ p = xdr_decode_hyper(p, &args->cbl_range.offset);
+ p = xdr_decode_hyper(p, &args->cbl_range.length);
+ status = decode_stateid(xdr, &args->cbl_stateid);
+ if (unlikely(status != 0))
+ goto out;
+ } else if (args->cbl_recall_type == RETURN_FSID) {
+ p = read_buf(xdr, 2 * sizeof(uint64_t));
+ if (unlikely(p == NULL)) {
+ status = htonl(NFS4ERR_BADXDR);
+ goto out;
+ }
+ p = xdr_decode_hyper(p, &args->cbl_fsid.major);
+ p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
+ } else if (args->cbl_recall_type != RETURN_ALL) {
+ status = htonl(NFS4ERR_BADXDR);
+ goto out;
+ }
+ dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d\n",
+ __func__,
+ args->cbl_layout_type, iomode,
+ args->cbl_layoutchanged, args->cbl_recall_type);
+out:
+ dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+ return status;
+}
+
static __be32 decode_sessionid(struct xdr_stream *xdr,
struct nfs4_sessionid *sid)
{
@@ -574,10 +638,10 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
case OP_CB_SEQUENCE:
case OP_CB_RECALL_ANY:
case OP_CB_RECALL_SLOT:
+ case OP_CB_LAYOUTRECALL:
*op = &callback_ops[op_nr];
break;
- case OP_CB_LAYOUTRECALL:
case OP_CB_NOTIFY_DEVICEID:
case OP_CB_NOTIFY:
case OP_CB_PUSH_DELEG:
@@ -593,6 +657,37 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
return htonl(NFS_OK);
}
+static void nfs4_callback_free_slot(struct nfs4_session *session)
+{
+ struct nfs4_slot_table *tbl = &session->bc_slot_table;
+
+ spin_lock(&tbl->slot_tbl_lock);
+ /*
+ * Let the state manager know callback processing done.
+ * A single slot, so highest used slotid is either 0 or -1
+ */
+ tbl->highest_used_slotid--;
+ nfs4_check_drain_bc_complete(session);
+ spin_unlock(&tbl->slot_tbl_lock);
+}
+
+static void nfs4_cb_free_slot(struct nfs_client *clp)
+{
+ if (clp && clp->cl_session)
+ nfs4_callback_free_slot(clp->cl_session);
+}
+
+/* A single slot, so highest used slotid is either 0 or -1 */
+void nfs4_cb_take_slot(struct nfs_client *clp)
+{
+ struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
+
+ spin_lock(&tbl->slot_tbl_lock);
+ tbl->highest_used_slotid++;
+ BUG_ON(tbl->highest_used_slotid != 0);
+ spin_unlock(&tbl->slot_tbl_lock);
+}
+
#else /* CONFIG_NFS_V4_1 */
static __be32
@@ -601,6 +696,9 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
}
+static void nfs4_cb_free_slot(struct nfs_client *clp)
+{
+}
#endif /* CONFIG_NFS_V4_1 */
static __be32
@@ -621,7 +719,8 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
static __be32 process_op(uint32_t minorversion, int nop,
struct svc_rqst *rqstp,
struct xdr_stream *xdr_in, void *argp,
- struct xdr_stream *xdr_out, void *resp, int* drc_status)
+ struct xdr_stream *xdr_out, void *resp,
+ struct cb_process_state *cps)
{
struct callback_op *op = &callback_ops[0];
unsigned int op_nr;
@@ -644,8 +743,8 @@ static __be32 process_op(uint32_t minorversion, int nop,
if (status)
goto encode_hdr;
- if (*drc_status) {
- status = *drc_status;
+ if (cps->drc_status) {
+ status = cps->drc_status;
goto encode_hdr;
}
@@ -653,16 +752,10 @@ static __be32 process_op(uint32_t minorversion, int nop,
if (maxlen > 0 && maxlen < PAGE_SIZE) {
status = op->decode_args(rqstp, xdr_in, argp);
if (likely(status == 0))
- status = op->process_op(argp, resp);
+ status = op->process_op(argp, resp, cps);
} else
status = htonl(NFS4ERR_RESOURCE);
- /* Only set by OP_CB_SEQUENCE processing */
- if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
- *drc_status = status;
- status = 0;
- }
-
encode_hdr:
res = encode_op_hdr(xdr_out, op_nr, status);
if (unlikely(res))
@@ -681,8 +774,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
struct cb_compound_hdr_arg hdr_arg = { 0 };
struct cb_compound_hdr_res hdr_res = { NULL };
struct xdr_stream xdr_in, xdr_out;
- __be32 *p;
- __be32 status, drc_status = 0;
+ __be32 *p, status;
+ struct cb_process_state cps = {
+ .drc_status = 0,
+ .clp = NULL,
+ };
unsigned int nops = 0;
dprintk("%s: start\n", __func__);
@@ -696,6 +792,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
if (status == __constant_htonl(NFS4ERR_RESOURCE))
return rpc_garbage_args;
+ if (hdr_arg.minorversion == 0) {
+ cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident);
+ if (!cps.clp)
+ return rpc_drop_reply;
+ } else
+ cps.svc_sid = bc_xprt_sid(rqstp);
+
hdr_res.taglen = hdr_arg.taglen;
hdr_res.tag = hdr_arg.tag;
if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0)
@@ -703,7 +806,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
while (status == 0 && nops != hdr_arg.nops) {
status = process_op(hdr_arg.minorversion, nops, rqstp,
- &xdr_in, argp, &xdr_out, resp, &drc_status);
+ &xdr_in, argp, &xdr_out, resp, &cps);
nops++;
}
@@ -716,6 +819,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
*hdr_res.status = status;
*hdr_res.nops = htonl(nops);
+ nfs4_cb_free_slot(cps.clp);
+ nfs_put_client(cps.clp);
dprintk("%s: done, status = %u\n", __func__, ntohl(status));
return rpc_success;
}
@@ -739,6 +844,12 @@ static struct callback_op callback_ops[] = {
.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
},
#if defined(CONFIG_NFS_V4_1)
+ [OP_CB_LAYOUTRECALL] = {
+ .process_op = (callback_process_op_t)nfs4_callback_layoutrecall,
+ .decode_args =
+ (callback_decode_arg_t)decode_layoutrecall_args,
+ .res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
+ },
[OP_CB_SEQUENCE] = {
.process_op = (callback_process_op_t)nfs4_callback_sequence,
.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0870d0d..192f2f8 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -56,6 +56,30 @@ static DEFINE_SPINLOCK(nfs_client_lock);
static LIST_HEAD(nfs_client_list);
static LIST_HEAD(nfs_volume_list);
static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
+#ifdef CONFIG_NFS_V4
+static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
+
+/*
+ * Get a unique NFSv4.0 callback identifier which will be used
+ * by the V4.0 callback service to lookup the nfs_client struct
+ */
+static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
+{
+ int ret = 0;
+
+ if (clp->rpc_ops->version != 4 || minorversion != 0)
+ return ret;
+retry:
+ if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL))
+ return -ENOMEM;
+ spin_lock(&nfs_client_lock);
+ ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident);
+ spin_unlock(&nfs_client_lock);
+ if (ret == -EAGAIN)
+ goto retry;
+ return ret;
+}
+#endif /* CONFIG_NFS_V4 */
/*
* RPC cruft for NFS
@@ -144,7 +168,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
clp->cl_proto = cl_init->proto;
#ifdef CONFIG_NFS_V4
- INIT_LIST_HEAD(&clp->cl_delegations);
+ err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
+ if (err)
+ goto error_cleanup;
+
spin_lock_init(&clp->cl_lock);
INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -170,21 +197,17 @@ error_0:
}
#ifdef CONFIG_NFS_V4
-/*
- * Clears/puts all minor version specific parts from an nfs_client struct
- * reverting it to minorversion 0.
- */
-static void nfs4_clear_client_minor_version(struct nfs_client *clp)
-{
#ifdef CONFIG_NFS_V4_1
- if (nfs4_has_session(clp)) {
+static void nfs4_shutdown_session(struct nfs_client *clp)
+{
+ if (nfs4_has_session(clp))
nfs4_destroy_session(clp->cl_session);
- clp->cl_session = NULL;
- }
-
- clp->cl_mvops = nfs_v4_minor_ops[0];
-#endif /* CONFIG_NFS_V4_1 */
}
+#else /* CONFIG_NFS_V4_1 */
+static void nfs4_shutdown_session(struct nfs_client *clp)
+{
+}
+#endif /* CONFIG_NFS_V4_1 */
/*
* Destroy the NFS4 callback service
@@ -199,17 +222,49 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
{
if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
nfs4_kill_renewd(clp);
- nfs4_clear_client_minor_version(clp);
+ nfs4_shutdown_session(clp);
nfs4_destroy_callback(clp);
if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
nfs_idmap_delete(clp);
rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
}
+
+/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
+void nfs_cleanup_cb_ident_idr(void)
+{
+ idr_destroy(&cb_ident_idr);
+}
+
+/* nfs_client_lock held */
+static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
+{
+ if (clp->cl_cb_ident)
+ idr_remove(&cb_ident_idr, clp->cl_cb_ident);
+}
+
+static void pnfs_init_server(struct nfs_server *server)
+{
+ rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
+}
+
#else
static void nfs4_shutdown_client(struct nfs_client *clp)
{
}
+
+void nfs_cleanup_cb_ident_idr(void)
+{
+}
+
+static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
+{
+}
+
+static void pnfs_init_server(struct nfs_server *server)
+{
+}
+
#endif /* CONFIG_NFS_V4 */
/*
@@ -248,6 +303,7 @@ void nfs_put_client(struct nfs_client *clp)
if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
list_del(&clp->cl_share_link);
+ nfs_cb_idr_remove_locked(clp);
spin_unlock(&nfs_client_lock);
BUG_ON(!list_empty(&clp->cl_superblocks));
@@ -363,70 +419,28 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
return 0;
}
-/*
- * Find a client by IP address and protocol version
- * - returns NULL if no such client
- */
-struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
-{
- struct nfs_client *clp;
-
- spin_lock(&nfs_client_lock);
- list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
- struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
-
- /* Don't match clients that failed to initialise properly */
- if (!(clp->cl_cons_state == NFS_CS_READY ||
- clp->cl_cons_state == NFS_CS_SESSION_INITING))
- continue;
-
- /* Different NFS versions cannot share the same nfs_client */
- if (clp->rpc_ops->version != nfsversion)
- continue;
-
- /* Match only the IP address, not the port number */
- if (!nfs_sockaddr_match_ipaddr(addr, clap))
- continue;
-
- atomic_inc(&clp->cl_count);
- spin_unlock(&nfs_client_lock);
- return clp;
- }
- spin_unlock(&nfs_client_lock);
- return NULL;
-}
-
-/*
- * Find a client by IP address and protocol version
- * - returns NULL if no such client
- */
-struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
+/* Common match routine for v4.0 and v4.1 callback services */
+bool
+nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
+ u32 minorversion)
{
- struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr;
- u32 nfsvers = clp->rpc_ops->version;
+ struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
- spin_lock(&nfs_client_lock);
- list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) {
- struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
+ /* Don't match clients that failed to initialise */
+ if (!(clp->cl_cons_state == NFS_CS_READY ||
+ clp->cl_cons_state == NFS_CS_SESSION_INITING))
+ return false;
- /* Don't match clients that failed to initialise properly */
- if (clp->cl_cons_state != NFS_CS_READY)
- continue;
+ /* Match the version and minorversion */
+ if (clp->rpc_ops->version != 4 ||
+ clp->cl_minorversion != minorversion)
+ return false;
- /* Different NFS versions cannot share the same nfs_client */
- if (clp->rpc_ops->version != nfsvers)
- continue;
+ /* Match only the IP address, not the port number */
+ if (!nfs_sockaddr_match_ipaddr(addr, clap))
+ return false;
- /* Match only the IP address, not the port number */
- if (!nfs_sockaddr_match_ipaddr(sap, clap))
- continue;
-
- atomic_inc(&clp->cl_count);
- spin_unlock(&nfs_client_lock);
- return clp;
- }
- spin_unlock(&nfs_client_lock);
- return NULL;
+ return true;
}
/*
@@ -988,6 +1002,27 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
target->options = source->options;
}
+static void nfs_server_insert_lists(struct nfs_server *server)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ spin_lock(&nfs_client_lock);
+ list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
+ list_add_tail(&server->master_link, &nfs_volume_list);
+ spin_unlock(&nfs_client_lock);
+
+}
+
+static void nfs_server_remove_lists(struct nfs_server *server)
+{
+ spin_lock(&nfs_client_lock);
+ list_del_rcu(&server->client_link);
+ list_del(&server->master_link);
+ spin_unlock(&nfs_client_lock);
+
+ synchronize_rcu();
+}
+
/*
* Allocate and initialise a server record
*/
@@ -1004,6 +1039,7 @@ static struct nfs_server *nfs_alloc_server(void)
/* Zero out the NFS state stuff */
INIT_LIST_HEAD(&server->client_link);
INIT_LIST_HEAD(&server->master_link);
+ INIT_LIST_HEAD(&server->delegations);
atomic_set(&server->active, 0);
@@ -1019,6 +1055,8 @@ static struct nfs_server *nfs_alloc_server(void)
return NULL;
}
+ pnfs_init_server(server);
+
return server;
}
@@ -1029,11 +1067,8 @@ void nfs_free_server(struct nfs_server *server)
{
dprintk("--> nfs_free_server()\n");
+ nfs_server_remove_lists(server);
unset_pnfs_layoutdriver(server);
- spin_lock(&nfs_client_lock);
- list_del(&server->client_link);
- list_del(&server->master_link);
- spin_unlock(&nfs_client_lock);
if (server->destroy != NULL)
server->destroy(server);
@@ -1108,11 +1143,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
+ nfs_server_insert_lists(server);
server->mount_time = jiffies;
nfs_free_fattr(fattr);
return server;
@@ -1125,6 +1156,101 @@ error:
#ifdef CONFIG_NFS_V4
/*
+ * NFSv4.0 callback thread helper
+ *
+ * Find a client by IP address, protocol version, and minorversion
+ *
+ * Called from the pg_authenticate method. The callback identifier
+ * is not used as it has not been decoded.
+ *
+ * Returns NULL if no such client
+ */
+struct nfs_client *
+nfs4_find_client_no_ident(const struct sockaddr *addr)
+{
+ struct nfs_client *clp;
+
+ spin_lock(&nfs_client_lock);
+ list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ if (nfs4_cb_match_client(addr, clp, 0) == false)
+ continue;
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nfs_client_lock);
+ return clp;
+ }
+ spin_unlock(&nfs_client_lock);
+ return NULL;
+}
+
+/*
+ * NFSv4.0 callback thread helper
+ *
+ * Find a client by callback identifier
+ */
+struct nfs_client *
+nfs4_find_client_ident(int cb_ident)
+{
+ struct nfs_client *clp;
+
+ spin_lock(&nfs_client_lock);
+ clp = idr_find(&cb_ident_idr, cb_ident);
+ if (clp)
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nfs_client_lock);
+ return clp;
+}
+
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * NFSv4.1 callback thread helper
+ * For CB_COMPOUND calls, find a client by IP address, protocol version,
+ * minorversion, and sessionID
+ *
+ * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service
+ * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL
+ * can arrive before the callback sessionid is set. For CB_NULL calls,
+ * find a client by IP address protocol version, and minorversion.
+ *
+ * Returns NULL if no such client
+ */
+struct nfs_client *
+nfs4_find_client_sessionid(const struct sockaddr *addr,
+ struct nfs4_sessionid *sid, int is_cb_compound)
+{
+ struct nfs_client *clp;
+
+ spin_lock(&nfs_client_lock);
+ list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ if (nfs4_cb_match_client(addr, clp, 1) == false)
+ continue;
+
+ if (!nfs4_has_session(clp))
+ continue;
+
+ /* Match sessionid unless cb_null call*/
+ if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data,
+ sid->data, NFS4_MAX_SESSIONID_LEN) != 0))
+ continue;
+
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nfs_client_lock);
+ return clp;
+ }
+ spin_unlock(&nfs_client_lock);
+ return NULL;
+}
+
+#else /* CONFIG_NFS_V4_1 */
+
+struct nfs_client *
+nfs4_find_client_sessionid(const struct sockaddr *addr,
+ struct nfs4_sessionid *sid, int is_cb_compound)
+{
+ return NULL;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
+/*
* Initialize the NFS4 callback service
*/
static int nfs4_init_callback(struct nfs_client *clp)
@@ -1342,11 +1468,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
server->namelen = NFS4_MAXNAMLEN;
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
+ nfs_server_insert_lists(server);
server->mount_time = jiffies;
out:
nfs_free_fattr(fattr);
@@ -1551,11 +1673,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
if (error < 0)
goto out_free_server;
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
+ nfs_server_insert_lists(server);
server->mount_time = jiffies;
nfs_free_fattr(fattr_fsinfo);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 232a7ee..364e432 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -11,7 +11,6 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
-#include <linux/smp_lock.h>
#include <linux/spinlock.h>
#include <linux/nfs4.h>
@@ -41,11 +40,23 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
call_rcu(&delegation->rcu, nfs_free_delegation_callback);
}
+/**
+ * nfs_mark_delegation_referenced - set delegation's REFERENCED flag
+ * @delegation: delegation to process
+ *
+ */
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
{
set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
}
+/**
+ * nfs_have_delegation - check if inode has a delegation
+ * @inode: inode to check
+ * @flags: delegation types to check for
+ *
+ * Returns one if inode has the indicated delegation, otherwise zero.
+ */
int nfs_have_delegation(struct inode *inode, fmode_t flags)
{
struct nfs_delegation *delegation;
@@ -120,10 +131,15 @@ again:
return 0;
}
-/*
- * Set up a delegation on an inode
+/**
+ * nfs_inode_reclaim_delegation - process a delegation reclaim request
+ * @inode: inode to process
+ * @cred: credential to use for request
+ * @res: new delegation state from server
+ *
*/
-void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
+void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
+ struct nfs_openres *res)
{
struct nfs_delegation *delegation;
struct rpc_cred *oldcred = NULL;
@@ -176,38 +192,52 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
return inode;
}
-static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi,
- const nfs4_stateid *stateid,
- struct nfs_client *clp)
+static struct nfs_delegation *
+nfs_detach_delegation_locked(struct nfs_inode *nfsi,
+ struct nfs_server *server)
{
struct nfs_delegation *delegation =
rcu_dereference_protected(nfsi->delegation,
- lockdep_is_held(&clp->cl_lock));
+ lockdep_is_held(&server->nfs_client->cl_lock));
if (delegation == NULL)
goto nomatch;
+
spin_lock(&delegation->lock);
- if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
- sizeof(delegation->stateid.data)) != 0)
- goto nomatch_unlock;
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
nfsi->delegation_state = 0;
rcu_assign_pointer(nfsi->delegation, NULL);
spin_unlock(&delegation->lock);
return delegation;
-nomatch_unlock:
- spin_unlock(&delegation->lock);
nomatch:
return NULL;
}
-/*
- * Set up a delegation on an inode
+static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
+ struct nfs_server *server)
+{
+ struct nfs_client *clp = server->nfs_client;
+ struct nfs_delegation *delegation;
+
+ spin_lock(&clp->cl_lock);
+ delegation = nfs_detach_delegation_locked(nfsi, server);
+ spin_unlock(&clp->cl_lock);
+ return delegation;
+}
+
+/**
+ * nfs_inode_set_delegation - set up a delegation on an inode
+ * @inode: inode to which delegation applies
+ * @cred: cred to use for subsequent delegation processing
+ * @res: new delegation state from server
+ *
+ * Returns zero on success, or a negative errno value.
*/
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
{
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_client *clp = server->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation, *old_delegation;
struct nfs_delegation *freeme = NULL;
@@ -228,7 +258,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
spin_lock(&clp->cl_lock);
old_delegation = rcu_dereference_protected(nfsi->delegation,
- lockdep_is_held(&clp->cl_lock));
+ lockdep_is_held(&clp->cl_lock));
if (old_delegation != NULL) {
if (memcmp(&delegation->stateid, &old_delegation->stateid,
sizeof(old_delegation->stateid)) == 0 &&
@@ -247,9 +277,9 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
delegation = NULL;
goto out;
}
- freeme = nfs_detach_delegation_locked(nfsi, NULL, clp);
+ freeme = nfs_detach_delegation_locked(nfsi, server);
}
- list_add_rcu(&delegation->super_list, &clp->cl_delegations);
+ list_add_rcu(&delegation->super_list, &server->delegations);
nfsi->delegation_state = delegation->type;
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
@@ -291,73 +321,85 @@ out:
return err;
}
-/*
- * Return all delegations that have been marked for return
+/**
+ * nfs_client_return_marked_delegations - return previously marked delegations
+ * @clp: nfs_client to process
+ *
+ * Returns zero on success, or a negative errno value.
*/
int nfs_client_return_marked_delegations(struct nfs_client *clp)
{
struct nfs_delegation *delegation;
+ struct nfs_server *server;
struct inode *inode;
int err = 0;
restart:
rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
- if (!test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
- continue;
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL)
- continue;
- spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
- spin_unlock(&clp->cl_lock);
- rcu_read_unlock();
- if (delegation != NULL) {
- filemap_flush(inode->i_mapping);
- err = __nfs_inode_return_delegation(inode, delegation, 0);
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ list_for_each_entry_rcu(delegation, &server->delegations,
+ super_list) {
+ if (!test_and_clear_bit(NFS_DELEGATION_RETURN,
+ &delegation->flags))
+ continue;
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL)
+ continue;
+ delegation = nfs_detach_delegation(NFS_I(inode),
+ server);
+ rcu_read_unlock();
+
+ if (delegation != NULL) {
+ filemap_flush(inode->i_mapping);
+ err = __nfs_inode_return_delegation(inode,
+ delegation, 0);
+ }
+ iput(inode);
+ if (!err)
+ goto restart;
+ set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
+ return err;
}
- iput(inode);
- if (!err)
- goto restart;
- set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
- return err;
}
rcu_read_unlock();
return 0;
}
-/*
- * This function returns the delegation without reclaiming opens
- * or protecting against delegation reclaims.
- * It is therefore really only safe to be called from
- * nfs4_clear_inode()
+/**
+ * nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens
+ * @inode: inode to process
+ *
+ * Does not protect against delegation reclaims, therefore really only safe
+ * to be called from nfs4_clear_inode().
*/
void nfs_inode_return_delegation_noreclaim(struct inode *inode)
{
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
if (rcu_access_pointer(nfsi->delegation) != NULL) {
- spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
- spin_unlock(&clp->cl_lock);
+ delegation = nfs_detach_delegation(nfsi, server);
if (delegation != NULL)
nfs_do_return_delegation(inode, delegation, 0);
}
}
+/**
+ * nfs_inode_return_delegation - synchronously return a delegation
+ * @inode: inode to process
+ *
+ * Returns zero on success, or a negative errno value.
+ */
int nfs_inode_return_delegation(struct inode *inode)
{
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
int err = 0;
if (rcu_access_pointer(nfsi->delegation) != NULL) {
- spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
- spin_unlock(&clp->cl_lock);
+ delegation = nfs_detach_delegation(nfsi, server);
if (delegation != NULL) {
nfs_wb_all(inode);
err = __nfs_inode_return_delegation(inode, delegation, 1);
@@ -366,46 +408,61 @@ int nfs_inode_return_delegation(struct inode *inode)
return err;
}
-static void nfs_mark_return_delegation(struct nfs_client *clp, struct nfs_delegation *delegation)
+static void nfs_mark_return_delegation(struct nfs_delegation *delegation)
{
+ struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
+
set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
}
-/*
- * Return all delegations associated to a super block
+/**
+ * nfs_super_return_all_delegations - return delegations for one superblock
+ * @sb: sb to process
+ *
*/
void nfs_super_return_all_delegations(struct super_block *sb)
{
- struct nfs_client *clp = NFS_SB(sb)->nfs_client;
+ struct nfs_server *server = NFS_SB(sb);
+ struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
if (clp == NULL)
return;
+
rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
spin_lock(&delegation->lock);
- if (delegation->inode != NULL && delegation->inode->i_sb == sb)
- set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
+ set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
spin_unlock(&delegation->lock);
}
rcu_read_unlock();
+
if (nfs_client_return_marked_delegations(clp) != 0)
nfs4_schedule_state_manager(clp);
}
-static
-void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, fmode_t flags)
+static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
+ fmode_t flags)
{
struct nfs_delegation *delegation;
- rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
continue;
if (delegation->type & flags)
- nfs_mark_return_delegation(clp, delegation);
+ nfs_mark_return_delegation(delegation);
}
+}
+
+static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp,
+ fmode_t flags)
+{
+ struct nfs_server *server;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ nfs_mark_return_all_delegation_types(server, flags);
rcu_read_unlock();
}
@@ -420,19 +477,32 @@ static void nfs_delegation_run_state_manager(struct nfs_client *clp)
nfs4_schedule_state_manager(clp);
}
+/**
+ * nfs_expire_all_delegation_types
+ * @clp: client to process
+ * @flags: delegation types to expire
+ *
+ */
void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags)
{
nfs_client_mark_return_all_delegation_types(clp, flags);
nfs_delegation_run_state_manager(clp);
}
+/**
+ * nfs_expire_all_delegations
+ * @clp: client to process
+ *
+ */
void nfs_expire_all_delegations(struct nfs_client *clp)
{
nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
}
-/*
- * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
+/**
+ * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
+ * @clp: client to process
+ *
*/
void nfs_handle_cb_pathdown(struct nfs_client *clp)
{
@@ -441,29 +511,43 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
nfs_client_mark_return_all_delegations(clp);
}
-static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *clp)
+static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
{
struct nfs_delegation *delegation;
- rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
continue;
- nfs_mark_return_delegation(clp, delegation);
+ nfs_mark_return_delegation(delegation);
}
- rcu_read_unlock();
}
+/**
+ * nfs_expire_unreferenced_delegations - Eliminate unused delegations
+ * @clp: nfs_client to process
+ *
+ */
void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
{
- nfs_client_mark_return_unreferenced_delegations(clp);
+ struct nfs_server *server;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ nfs_mark_return_unreferenced_delegations(server);
+ rcu_read_unlock();
+
nfs_delegation_run_state_manager(clp);
}
-/*
- * Asynchronous delegation recall!
+/**
+ * nfs_async_inode_return_delegation - asynchronously return a delegation
+ * @inode: inode to process
+ * @stateid: state ID information from CB_RECALL arguments
+ *
+ * Returns zero on success, or a negative errno value.
*/
-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+int nfs_async_inode_return_delegation(struct inode *inode,
+ const nfs4_stateid *stateid)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_delegation *delegation;
@@ -475,22 +559,21 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
rcu_read_unlock();
return -ENOENT;
}
-
- nfs_mark_return_delegation(clp, delegation);
+ nfs_mark_return_delegation(delegation);
rcu_read_unlock();
+
nfs_delegation_run_state_manager(clp);
return 0;
}
-/*
- * Retrieve the inode associated with a delegation
- */
-struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
+static struct inode *
+nfs_delegation_find_inode_server(struct nfs_server *server,
+ const struct nfs_fh *fhandle)
{
struct nfs_delegation *delegation;
struct inode *res = NULL;
- rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL &&
nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
@@ -500,49 +583,121 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
if (res != NULL)
break;
}
+ return res;
+}
+
+/**
+ * nfs_delegation_find_inode - retrieve the inode associated with a delegation
+ * @clp: client state handle
+ * @fhandle: filehandle from a delegation recall
+ *
+ * Returns pointer to inode matching "fhandle," or NULL if a matching inode
+ * cannot be found.
+ */
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp,
+ const struct nfs_fh *fhandle)
+{
+ struct nfs_server *server;
+ struct inode *res = NULL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ res = nfs_delegation_find_inode_server(server, fhandle);
+ if (res != NULL)
+ break;
+ }
rcu_read_unlock();
return res;
}
-/*
- * Mark all delegations as needing to be reclaimed
+static void nfs_delegation_mark_reclaim_server(struct nfs_server *server)
+{
+ struct nfs_delegation *delegation;
+
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list)
+ set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+}
+
+/**
+ * nfs_delegation_mark_reclaim - mark all delegations as needing to be reclaimed
+ * @clp: nfs_client to process
+ *
*/
void nfs_delegation_mark_reclaim(struct nfs_client *clp)
{
- struct nfs_delegation *delegation;
+ struct nfs_server *server;
+
rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
- set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ nfs_delegation_mark_reclaim_server(server);
rcu_read_unlock();
}
-/*
- * Reap all unclaimed delegations after reboot recovery is done
+/**
+ * nfs_delegation_reap_unclaimed - reap unclaimed delegations after reboot recovery is done
+ * @clp: nfs_client to process
+ *
*/
void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
{
struct nfs_delegation *delegation;
+ struct nfs_server *server;
struct inode *inode;
+
restart:
rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
- if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0)
- continue;
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL)
- continue;
- spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
- spin_unlock(&clp->cl_lock);
- rcu_read_unlock();
- if (delegation != NULL)
- nfs_free_delegation(delegation);
- iput(inode);
- goto restart;
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ list_for_each_entry_rcu(delegation, &server->delegations,
+ super_list) {
+ if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
+ &delegation->flags) == 0)
+ continue;
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL)
+ continue;
+ delegation = nfs_detach_delegation(NFS_I(inode),
+ server);
+ rcu_read_unlock();
+
+ if (delegation != NULL)
+ nfs_free_delegation(delegation);
+ iput(inode);
+ goto restart;
+ }
}
rcu_read_unlock();
}
+/**
+ * nfs_delegations_present - check for existence of delegations
+ * @clp: client state handle
+ *
+ * Returns one if there are any nfs_delegation structures attached
+ * to this nfs_client.
+ */
+int nfs_delegations_present(struct nfs_client *clp)
+{
+ struct nfs_server *server;
+ int ret = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ if (!list_empty(&server->delegations)) {
+ ret = 1;
+ break;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+/**
+ * nfs4_copy_delegation_stateid - Copy inode's state ID information
+ * @dst: stateid data structure to fill in
+ * @inode: inode to check
+ *
+ * Returns one and fills in "dst->data" * if inode had a delegation,
+ * otherwise zero is returned.
+ */
int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2026304..d9322e4 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -44,6 +44,7 @@ void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
void nfs_handle_cb_pathdown(struct nfs_client *clp);
int nfs_client_return_marked_delegations(struct nfs_client *clp);
+int nfs_delegations_present(struct nfs_client *clp);
void nfs_delegation_mark_reclaim(struct nfs_client *clp);
void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 07ac384..abe4f0c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,7 +33,8 @@
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/sched.h>
-#include <linux/vmalloc.h>
+#include <linux/kmemleak.h>
+#include <linux/xattr.h>
#include "delegation.h"
#include "iostat.h"
@@ -56,7 +57,7 @@ static int nfs_rename(struct inode *, struct dentry *,
struct inode *, struct dentry *);
static int nfs_fsync_dir(struct file *, int);
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
-static int nfs_readdir_clear_array(struct page*, gfp_t);
+static void nfs_readdir_clear_array(struct page*);
const struct file_operations nfs_dir_operations = {
.llseek = nfs_llseek_dir,
@@ -82,8 +83,8 @@ const struct inode_operations nfs_dir_inode_operations = {
.setattr = nfs_setattr,
};
-const struct address_space_operations nfs_dir_addr_space_ops = {
- .releasepage = nfs_readdir_clear_array,
+const struct address_space_operations nfs_dir_aops = {
+ .freepage = nfs_readdir_clear_array,
};
#ifdef CONFIG_NFS_V3
@@ -124,9 +125,10 @@ const struct inode_operations nfs4_dir_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
- .getxattr = nfs4_getxattr,
- .setxattr = nfs4_setxattr,
- .listxattr = nfs4_listxattr,
+ .getxattr = generic_getxattr,
+ .setxattr = generic_setxattr,
+ .listxattr = generic_listxattr,
+ .removexattr = generic_removexattr,
};
#endif /* CONFIG_NFS_V4 */
@@ -161,6 +163,7 @@ struct nfs_cache_array_entry {
u64 cookie;
u64 ino;
struct qstr string;
+ unsigned char d_type;
};
struct nfs_cache_array {
@@ -170,14 +173,13 @@ struct nfs_cache_array {
struct nfs_cache_array_entry array[0];
};
-#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
-
-typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
typedef struct {
struct file *file;
struct page *page;
unsigned long page_index;
u64 *dir_cookie;
+ u64 last_cookie;
loff_t current_index;
decode_dirent_t decode;
@@ -194,9 +196,13 @@ typedef struct {
static
struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
{
+ void *ptr;
if (page == NULL)
return ERR_PTR(-EIO);
- return (struct nfs_cache_array *)kmap(page);
+ ptr = kmap(page);
+ if (ptr == NULL)
+ return ERR_PTR(-ENOMEM);
+ return ptr;
}
static
@@ -209,14 +215,15 @@ void nfs_readdir_release_array(struct page *page)
* we are freeing strings created by nfs_add_to_readdir_array()
*/
static
-int nfs_readdir_clear_array(struct page *page, gfp_t mask)
+void nfs_readdir_clear_array(struct page *page)
{
- struct nfs_cache_array *array = nfs_readdir_get_array(page);
+ struct nfs_cache_array *array;
int i;
+
+ array = kmap_atomic(page, KM_USER0);
for (i = 0; i < array->size; i++)
kfree(array->array[i].string.name);
- nfs_readdir_release_array(page);
- return 0;
+ kunmap_atomic(array, KM_USER0);
}
/*
@@ -231,6 +238,11 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le
string->name = kmemdup(name, len, GFP_KERNEL);
if (string->name == NULL)
return -ENOMEM;
+ /*
+ * Avoid a kmemleak false positive. The pointer to the name is stored
+ * in a page cache page which kmemleak does not scan.
+ */
+ kmemleak_not_leak(string->name);
string->hash = full_name_hash(name, len);
return 0;
}
@@ -244,20 +256,24 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
if (IS_ERR(array))
return PTR_ERR(array);
- ret = -EIO;
- if (array->size >= MAX_READDIR_ARRAY)
- goto out;
cache_entry = &array->array[array->size];
+
+ /* Check that this entry lies within the page bounds */
+ ret = -ENOSPC;
+ if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE)
+ goto out;
+
cache_entry->cookie = entry->prev_cookie;
cache_entry->ino = entry->ino;
+ cache_entry->d_type = entry->d_type;
ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
if (ret)
goto out;
array->last_cookie = entry->cookie;
- if (entry->eof == 1)
- array->eof_index = array->size;
array->size++;
+ if (entry->eof != 0)
+ array->eof_index = array->size;
out:
nfs_readdir_release_array(page);
return ret;
@@ -272,7 +288,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
if (diff < 0)
goto out_eof;
if (diff >= array->size) {
- if (array->eof_index > 0)
+ if (array->eof_index >= 0)
goto out_eof;
desc->current_index += array->size;
return -EAGAIN;
@@ -281,8 +297,6 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
index = (unsigned int)diff;
*desc->dir_cookie = array->array[index].cookie;
desc->cache_entry_index = index;
- if (index == array->eof_index)
- desc->eof = 1;
return 0;
out_eof:
desc->eof = 1;
@@ -296,17 +310,16 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
int status = -EAGAIN;
for (i = 0; i < array->size; i++) {
- if (i == array->eof_index) {
- desc->eof = 1;
- status = -EBADCOOKIE;
- }
if (array->array[i].cookie == *desc->dir_cookie) {
desc->cache_entry_index = i;
- status = 0;
- break;
+ return 0;
}
}
-
+ if (array->eof_index >= 0) {
+ status = -EBADCOOKIE;
+ if (*desc->dir_cookie == array->last_cookie)
+ desc->eof = 1;
+ }
return status;
}
@@ -314,10 +327,7 @@ static
int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
{
struct nfs_cache_array *array;
- int status = -EBADCOOKIE;
-
- if (desc->dir_cookie == NULL)
- goto out;
+ int status;
array = nfs_readdir_get_array(desc->page);
if (IS_ERR(array)) {
@@ -330,6 +340,10 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
else
status = nfs_readdir_search_for_cookie(array, desc);
+ if (status == -EAGAIN) {
+ desc->last_cookie = array->last_cookie;
+ desc->page_index++;
+ }
nfs_readdir_release_array(desc->page);
out:
return status;
@@ -365,14 +379,14 @@ error:
return error;
}
-/* Fill in an entry based on the xdr code stored in desc->page */
-static
-int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
+static int xdr_decode(nfs_readdir_descriptor_t *desc,
+ struct nfs_entry *entry, struct xdr_stream *xdr)
{
- __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
- if (IS_ERR(p))
- return PTR_ERR(p);
+ int error;
+ error = desc->decode(xdr, entry, desc->plus);
+ if (error)
+ return error;
entry->fattr->time_start = desc->timestamp;
entry->fattr->gencount = desc->gencount;
return 0;
@@ -381,13 +395,9 @@ int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct x
static
int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
{
- struct nfs_inode *node;
if (dentry->d_inode == NULL)
goto different;
- node = NFS_I(dentry->d_inode);
- if (node->fh.size != entry->fh->size)
- goto different;
- if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0)
+ if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0)
goto different;
return 1;
different:
@@ -429,7 +439,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
if (dentry == NULL)
return;
- dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+ d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
if (IS_ERR(inode))
goto out;
@@ -449,43 +459,58 @@ out:
/* Perform conversion from xdr to cache array */
static
-void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
- void *xdr_page, struct page *page, unsigned int buflen)
+int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
+ struct page **xdr_pages, struct page *page, unsigned int buflen)
{
struct xdr_stream stream;
- struct xdr_buf buf;
- __be32 *ptr = xdr_page;
- int status;
+ struct xdr_buf buf = {
+ .pages = xdr_pages,
+ .page_len = buflen,
+ .buflen = buflen,
+ .len = buflen,
+ };
+ struct page *scratch;
struct nfs_cache_array *array;
+ unsigned int count = 0;
+ int status;
- buf.head->iov_base = xdr_page;
- buf.head->iov_len = buflen;
- buf.tail->iov_len = 0;
- buf.page_base = 0;
- buf.page_len = 0;
- buf.buflen = buf.head->iov_len;
- buf.len = buf.head->iov_len;
-
- xdr_init_decode(&stream, &buf, ptr);
+ scratch = alloc_page(GFP_KERNEL);
+ if (scratch == NULL)
+ return -ENOMEM;
+ xdr_init_decode(&stream, &buf, NULL);
+ xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
do {
status = xdr_decode(desc, entry, &stream);
- if (status != 0)
+ if (status != 0) {
+ if (status == -EAGAIN)
+ status = 0;
break;
+ }
- if (nfs_readdir_add_to_array(entry, page) == -1)
- break;
- if (desc->plus == 1)
+ count++;
+
+ if (desc->plus != 0)
nfs_prime_dcache(desc->file->f_path.dentry, entry);
+
+ status = nfs_readdir_add_to_array(entry, page);
+ if (status != 0)
+ break;
} while (!entry->eof);
- if (status == -EBADCOOKIE && entry->eof) {
+ if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
array = nfs_readdir_get_array(page);
- array->eof_index = array->size - 1;
- status = 0;
- nfs_readdir_release_array(page);
+ if (!IS_ERR(array)) {
+ array->eof_index = array->size;
+ status = 0;
+ nfs_readdir_release_array(page);
+ } else
+ status = PTR_ERR(array);
}
+
+ put_page(scratch);
+ return status;
}
static
@@ -500,7 +525,6 @@ static
void nfs_readdir_free_large_page(void *ptr, struct page **pages,
unsigned int npages)
{
- vm_unmap_ram(ptr, npages);
nfs_readdir_free_pagearray(pages, npages);
}
@@ -509,9 +533,8 @@ void nfs_readdir_free_large_page(void *ptr, struct page **pages,
* to nfs_readdir_free_large_page
*/
static
-void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
+int nfs_readdir_large_page(struct page **pages, unsigned int npages)
{
- void *ptr;
unsigned int i;
for (i = 0; i < npages; i++) {
@@ -520,13 +543,11 @@ void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
goto out_freepages;
pages[i] = page;
}
+ return 0;
- ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
- if (!IS_ERR_OR_NULL(ptr))
- return ptr;
out_freepages:
nfs_readdir_free_pagearray(pages, i);
- return NULL;
+ return -ENOMEM;
}
static
@@ -537,31 +558,43 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
struct nfs_entry entry;
struct file *file = desc->file;
struct nfs_cache_array *array;
- int status = 0;
+ int status = -ENOMEM;
unsigned int array_size = ARRAY_SIZE(pages);
entry.prev_cookie = 0;
- entry.cookie = *desc->dir_cookie;
+ entry.cookie = desc->last_cookie;
entry.eof = 0;
entry.fh = nfs_alloc_fhandle();
entry.fattr = nfs_alloc_fattr();
+ entry.server = NFS_SERVER(inode);
if (entry.fh == NULL || entry.fattr == NULL)
goto out;
array = nfs_readdir_get_array(page);
+ if (IS_ERR(array)) {
+ status = PTR_ERR(array);
+ goto out;
+ }
memset(array, 0, sizeof(struct nfs_cache_array));
array->eof_index = -1;
- pages_ptr = nfs_readdir_large_page(pages, array_size);
- if (!pages_ptr)
+ status = nfs_readdir_large_page(pages, array_size);
+ if (status < 0)
goto out_release_array;
do {
+ unsigned int pglen;
status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
if (status < 0)
break;
- nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE);
- } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY);
+ pglen = status;
+ status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
+ if (status < 0) {
+ if (status == -ENOSPC)
+ status = 0;
+ break;
+ }
+ } while (array->eof_index < 0);
nfs_readdir_free_large_page(pages_ptr, pages, array_size);
out_release_array:
@@ -582,8 +615,10 @@ static
int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
{
struct inode *inode = desc->file->f_path.dentry->d_inode;
+ int ret;
- if (nfs_readdir_xdr_to_array(desc, page, inode) < 0)
+ ret = nfs_readdir_xdr_to_array(desc, page, inode);
+ if (ret < 0)
goto error;
SetPageUptodate(page);
@@ -595,12 +630,14 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
return 0;
error:
unlock_page(page);
- return -EIO;
+ return ret;
}
static
void cache_page_release(nfs_readdir_descriptor_t *desc)
{
+ if (!desc->page->mapping)
+ nfs_readdir_clear_array(desc->page);
page_cache_release(desc->page);
desc->page = NULL;
}
@@ -608,12 +645,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
static
struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
{
- struct page *page;
- page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
+ return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
desc->page_index, (filler_t *)nfs_readdir_filler, desc);
- if (IS_ERR(page))
- desc->eof = 1;
- return page;
}
/*
@@ -629,9 +662,8 @@ int find_cache_page(nfs_readdir_descriptor_t *desc)
return PTR_ERR(desc->page);
res = nfs_readdir_search_array(desc);
- if (res == 0)
- return 0;
- cache_page_release(desc);
+ if (res != 0)
+ cache_page_release(desc);
return res;
}
@@ -639,22 +671,18 @@ int find_cache_page(nfs_readdir_descriptor_t *desc)
static inline
int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
{
- int res = -EAGAIN;
+ int res;
- while (1) {
- res = find_cache_page(desc);
- if (res != -EAGAIN)
- break;
- desc->page_index++;
+ if (desc->page_index == 0) {
+ desc->current_index = 0;
+ desc->last_cookie = 0;
}
+ do {
+ res = find_cache_page(desc);
+ } while (res == -EAGAIN);
return res;
}
-static inline unsigned int dt_type(struct inode *inode)
-{
- return (inode->i_mode >> 12) & 15;
-}
-
/*
* Once we've found the start of the dirent within a page: fill 'er up...
*/
@@ -666,35 +694,35 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
int i = 0;
int res = 0;
struct nfs_cache_array *array = NULL;
- unsigned int d_type = DT_UNKNOWN;
- struct dentry *dentry = NULL;
array = nfs_readdir_get_array(desc->page);
+ if (IS_ERR(array)) {
+ res = PTR_ERR(array);
+ goto out;
+ }
for (i = desc->cache_entry_index; i < array->size; i++) {
- d_type = DT_UNKNOWN;
+ struct nfs_cache_array_entry *ent;
- res = filldir(dirent, array->array[i].string.name,
- array->array[i].string.len, file->f_pos,
- nfs_compat_user_ino64(array->array[i].ino), d_type);
- if (res < 0)
+ ent = &array->array[i];
+ if (filldir(dirent, ent->string.name, ent->string.len,
+ file->f_pos, nfs_compat_user_ino64(ent->ino),
+ ent->d_type) < 0) {
+ desc->eof = 1;
break;
+ }
file->f_pos++;
- desc->cache_entry_index = i;
if (i < (array->size-1))
*desc->dir_cookie = array->array[i+1].cookie;
else
*desc->dir_cookie = array->last_cookie;
- if (i == array->eof_index) {
- desc->eof = 1;
- break;
- }
}
+ if (array->eof_index >= 0)
+ desc->eof = 1;
nfs_readdir_release_array(desc->page);
+out:
cache_page_release(desc);
- if (dentry != NULL)
- dput(dentry);
dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
(unsigned long long)*desc->dir_cookie, res);
return res;
@@ -729,13 +757,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
goto out;
}
- if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
- status = -EIO;
- goto out_release;
- }
-
desc->page_index = 0;
+ desc->last_cookie = *desc->dir_cookie;
desc->page = page;
+
+ status = nfs_readdir_xdr_to_array(desc, page, inode);
+ if (status < 0)
+ goto out_release;
+
status = nfs_do_filldir(desc, dirent, filldir);
out:
@@ -757,7 +786,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
struct inode *inode = dentry->d_inode;
nfs_readdir_descriptor_t my_desc,
*desc = &my_desc;
- int res = -ENOMEM;
+ int res;
dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -782,18 +811,18 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
if (res < 0)
goto out;
- while (desc->eof != 1) {
+ do {
res = readdir_search_pagecache(desc);
if (res == -EBADCOOKIE) {
+ res = 0;
/* This means either end of directory */
if (*desc->dir_cookie && desc->eof == 0) {
/* Or that the server has 'lost' a cookie */
res = uncached_readdir(desc, dirent, filldir);
- if (res >= 0)
+ if (res == 0)
continue;
}
- res = 0;
break;
}
if (res == -ETOOSMALL && desc->plus) {
@@ -808,11 +837,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
break;
res = nfs_do_filldir(desc, dirent, filldir);
- if (res < 0) {
- res = 0;
+ if (res < 0)
break;
- }
- }
+ } while (!desc->eof);
out:
nfs_unblock_sillyrename(dentry);
if (res > 0)
@@ -912,7 +939,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
* component of the path.
* We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT.
*/
-static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask)
+static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd,
+ unsigned int mask)
{
if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))
return 0;
@@ -992,7 +1020,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
* If the parent directory is seen to have changed, we throw out the
* cached dentry and do a new lookup.
*/
-static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
+static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *dir;
struct inode *inode;
@@ -1001,6 +1029,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
struct nfs_fattr *fattr = NULL;
int error;
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
parent = dget_parent(dentry);
dir = parent->d_inode;
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
@@ -1091,7 +1122,7 @@ out_error:
/*
* This is called from dput() when d_count is going to 0.
*/
-static int nfs_dentry_delete(struct dentry *dentry)
+static int nfs_dentry_delete(const struct dentry *dentry)
{
dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -1162,7 +1193,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
goto out;
- dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+ d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
/*
* If we're doing an exclusive create, optimize away the lookup
@@ -1191,7 +1222,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
goto out_unblock_sillyrename;
}
inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
- res = (struct dentry *)inode;
+ res = ERR_CAST(inode);
if (IS_ERR(res))
goto out_unblock_sillyrename;
@@ -1307,7 +1338,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
res = ERR_PTR(-ENAMETOOLONG);
goto out;
}
- dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+ d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
/* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
* the dentry. */
@@ -1325,8 +1356,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
if (nd->flags & LOOKUP_CREATE) {
attr.ia_mode = nd->intent.open.create_mode;
attr.ia_valid = ATTR_MODE;
- if (!IS_POSIXACL(dir))
- attr.ia_mode &= ~current_umask();
+ attr.ia_mode &= ~current_umask();
} else {
open_flags &= ~(O_EXCL | O_CREAT);
attr.ia_valid = 0;
@@ -1345,12 +1375,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
res = NULL;
goto out;
/* This turned out not to be a regular file */
- case -EISDIR:
case -ENOTDIR:
goto no_open;
case -ELOOP:
if (!(nd->intent.open.flags & O_NOFOLLOW))
goto no_open;
+ /* case -EISDIR: */
/* case -EINVAL: */
default:
res = ERR_CAST(inode);
@@ -1692,11 +1722,9 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
dir->i_ino, dentry->d_name.name);
- spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count) > 1) {
+ if (dentry->d_count > 1) {
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
/* Start asynchronous writeout of the inode */
write_inode_now(dentry->d_inode, 0);
error = nfs_sillyrename(dir, dentry);
@@ -1707,7 +1735,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
need_rehash = 1;
}
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
error = nfs_safe_remove(dentry);
if (!error || error == -ENOENT) {
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -1842,7 +1869,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
- atomic_read(&new_dentry->d_count));
+ new_dentry->d_count);
/*
* For non-directories, check whether the target is busy and if so,
@@ -1860,7 +1887,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
rehash = new_dentry;
}
- if (atomic_read(&new_dentry->d_count) > 2) {
+ if (new_dentry->d_count > 2) {
int err;
/* copy the target dentry's name */
@@ -2162,11 +2189,14 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
}
-int nfs_permission(struct inode *inode, int mask)
+int nfs_permission(struct inode *inode, int mask, unsigned int flags)
{
struct rpc_cred *cred;
int res = 0;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
nfs_inc_stats(inode, NFSIOS_VFSACCESS);
if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@@ -2214,7 +2244,7 @@ out:
out_notsup:
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (res == 0)
- res = generic_permission(inode, mask, NULL);
+ res = generic_permission(inode, mask, flags, NULL);
goto out;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 064a809..e6ace0d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -867,13 +867,13 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
goto out;
nfs_alloc_commit_data(dreq);
- if (dreq->commit_data == NULL || count < wsize)
+ if (dreq->commit_data == NULL || count <= wsize)
sync = NFS_FILE_SYNC;
dreq->inode = inode;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
- if (dreq->l_ctx != NULL)
+ if (dreq->l_ctx == NULL)
goto out_release;
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e756075..7bf029e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -693,6 +693,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
struct inode *inode = filp->f_mapping->host;
int status = 0;
+ unsigned int saved_type = fl->fl_type;
/* Try local locking first */
posix_test_lock(filp, fl);
@@ -700,6 +701,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
/* found a conflict */
goto out;
}
+ fl->fl_type = saved_type;
if (nfs_have_delegation(inode, FMODE_READ))
goto out_noconflict;
@@ -884,6 +886,5 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
file->f_path.dentry->d_parent->d_name.name,
file->f_path.dentry->d_name.name, arg);
-
return -EINVAL;
}
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index ac7b814..5596c6a 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -63,9 +63,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
* This again causes shrink_dcache_for_umount_subtree() to
* Oops, since the test for IS_ROOT() will fail.
*/
- spin_lock(&dcache_lock);
+ spin_lock(&sb->s_root->d_inode->i_lock);
+ spin_lock(&sb->s_root->d_lock);
list_del_init(&sb->s_root->d_alias);
- spin_unlock(&dcache_lock);
+ spin_unlock(&sb->s_root->d_lock);
+ spin_unlock(&sb->s_root->d_inode->i_lock);
}
return 0;
}
@@ -119,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
security_d_instantiate(ret, inode);
if (ret->d_op == NULL)
- ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
+ d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
out:
nfs_free_fattr(fsinfo.fattr);
return ret;
@@ -226,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
security_d_instantiate(ret, inode);
if (ret->d_op == NULL)
- ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
+ d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
out:
nfs_free_fattr(fattr);
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dec47ed..1869688 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -123,7 +123,7 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
size_t desclen = typelen + namelen + 2;
*desc = kmalloc(desclen, GFP_KERNEL);
- if (!desc)
+ if (!*desc)
return -ENOMEM;
cp = *desc;
@@ -238,7 +238,7 @@ int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t bu
return nfs_idmap_lookup_name(gid, "group", buf, buflen);
}
-#else /* CONFIG_NFS_USE_IDMAPPER not defined */
+#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
#include <linux/module.h>
#include <linux/mutex.h>
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 314f571..ce00b70 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -289,6 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
inode->i_fop = &nfs_dir_operations;
+ inode->i_data.a_ops = &nfs_dir_aops;
if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
/* Deal with crossing mountpoints */
@@ -1409,9 +1410,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
void nfs4_evict_inode(struct inode *inode)
{
+ pnfs_destroy_layout(NFS_I(inode));
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
- pnfs_destroy_layout(NFS_I(inode));
/* If we are holding a delegation, return it! */
nfs_inode_return_delegation_noreclaim(inode);
/* First call standard NFS clear_inode() code */
@@ -1437,11 +1438,18 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
return &nfsi->vfs_inode;
}
-void nfs_destroy_inode(struct inode *inode)
+static void nfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
}
+void nfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, nfs_i_callback);
+}
+
static inline void nfs4_init_once(struct nfs_inode *nfsi)
{
#ifdef CONFIG_NFS_V4
@@ -1611,6 +1619,7 @@ static void __exit exit_nfs_fs(void)
#ifdef CONFIG_PROC_FS
rpc_proc_unregister("nfs");
#endif
+ nfs_cleanup_cb_ident_idr();
unregister_nfs_fs();
nfs_fs_proc_exit();
nfsiod_stop();
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index db08ff3..bfa3a34 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -128,9 +128,13 @@ extern void nfs_umount(const struct nfs_mount_request *info);
/* client.c */
extern struct rpc_program nfs_program;
+extern void nfs_cleanup_cb_ident_idr(void);
extern void nfs_put_client(struct nfs_client *);
-extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32);
-extern struct nfs_client *nfs_find_client_next(struct nfs_client *);
+extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *);
+extern struct nfs_client *nfs4_find_client_ident(int);
+extern struct nfs_client *
+nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *,
+ int);
extern struct nfs_server *nfs_create_server(
const struct nfs_parsed_mount_data *,
struct nfs_fh *);
@@ -185,17 +189,20 @@ extern int __init nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
/* nfs2xdr.c */
-extern int nfs_stat_to_errno(int);
+extern int nfs_stat_to_errno(enum nfs_stat);
extern struct rpc_procinfo nfs_procedures[];
-extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+extern int nfs2_decode_dirent(struct xdr_stream *,
+ struct nfs_entry *, int);
/* nfs3xdr.c */
extern struct rpc_procinfo nfs3_procedures[];
-extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+extern int nfs3_decode_dirent(struct xdr_stream *,
+ struct nfs_entry *, int);
/* nfs4xdr.c */
#ifdef CONFIG_NFS_V4
-extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+extern int nfs4_decode_dirent(struct xdr_stream *,
+ struct nfs_entry *, int);
#endif
#ifdef CONFIG_NFS_V4_1
extern const u32 nfs41_maxread_overhead;
@@ -362,6 +369,15 @@ unsigned int nfs_page_length(struct page *page)
}
/*
+ * Convert a umode to a dirent->d_type
+ */
+static inline
+unsigned char nfs_umode_to_dtype(umode_t mode)
+{
+ return (mode >> 12) & 15;
+}
+
+/*
* Determine the number of pages in an array of length 'len' and
* with a base offset of 'base'
*/
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index eceafe7..d4c2d6b 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -236,10 +236,8 @@ void nfs_umount(const struct nfs_mount_request *info)
.authflavor = RPC_AUTH_UNIX,
.flags = RPC_CLNT_CREATE_NOPING,
};
- struct mountres result;
struct rpc_message msg = {
.rpc_argp = info->dirpath,
- .rpc_resp = &result,
};
struct rpc_clnt *clnt;
int status;
@@ -248,7 +246,7 @@ void nfs_umount(const struct nfs_mount_request *info)
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
clnt = rpc_create(&args);
- if (unlikely(IS_ERR(clnt)))
+ if (IS_ERR(clnt))
goto out_clnt_err;
dprintk("NFS: sending UMNT request for %s:%s\n",
@@ -280,29 +278,20 @@ out_call_err:
* XDR encode/decode functions for MOUNT
*/
-static int encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
+static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
{
const u32 pathname_len = strlen(pathname);
__be32 *p;
- if (unlikely(pathname_len > MNTPATHLEN))
- return -EIO;
-
- p = xdr_reserve_space(xdr, sizeof(u32) + pathname_len);
- if (unlikely(p == NULL))
- return -EIO;
+ BUG_ON(pathname_len > MNTPATHLEN);
+ p = xdr_reserve_space(xdr, 4 + pathname_len);
xdr_encode_opaque(p, pathname, pathname_len);
-
- return 0;
}
-static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p,
- const char *dirpath)
+static void mnt_xdr_enc_dirpath(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const char *dirpath)
{
- struct xdr_stream xdr;
-
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- return encode_mntdirpath(&xdr, dirpath);
+ encode_mntdirpath(xdr, dirpath);
}
/*
@@ -320,10 +309,10 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res)
u32 status;
__be32 *p;
- p = xdr_inline_decode(xdr, sizeof(status));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
- status = ntohl(*p);
+ status = be32_to_cpup(p);
for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) {
if (mnt_errtbl[i].status == status) {
@@ -351,18 +340,16 @@ static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
return 0;
}
-static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p,
- struct mountres *res)
+static int mnt_xdr_dec_mountres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct mountres *res)
{
- struct xdr_stream xdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
- status = decode_status(&xdr, res);
+ status = decode_status(xdr, res);
if (unlikely(status != 0 || res->errno != 0))
return status;
- return decode_fhandle(&xdr, res);
+ return decode_fhandle(xdr, res);
}
static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
@@ -371,10 +358,10 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
u32 status;
__be32 *p;
- p = xdr_inline_decode(xdr, sizeof(status));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
- status = ntohl(*p);
+ status = be32_to_cpup(p);
for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) {
if (mnt3_errtbl[i].status == status) {
@@ -394,11 +381,11 @@ static int decode_fhandle3(struct xdr_stream *xdr, struct mountres *res)
u32 size;
__be32 *p;
- p = xdr_inline_decode(xdr, sizeof(size));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
- size = ntohl(*p++);
+ size = be32_to_cpup(p);
if (size > NFS3_FHSIZE || size == 0)
return -EIO;
@@ -421,15 +408,15 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
if (*count == 0)
return 0;
- p = xdr_inline_decode(xdr, sizeof(entries));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
- entries = ntohl(*p);
+ entries = be32_to_cpup(p);
dprintk("NFS: received %u auth flavors\n", entries);
if (entries > NFS_MAX_SECFLAVORS)
entries = NFS_MAX_SECFLAVORS;
- p = xdr_inline_decode(xdr, sizeof(u32) * entries);
+ p = xdr_inline_decode(xdr, 4 * entries);
if (unlikely(p == NULL))
return -EIO;
@@ -437,7 +424,7 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
entries = *count;
for (i = 0; i < entries; i++) {
- flavors[i] = ntohl(*p++);
+ flavors[i] = be32_to_cpup(p++);
dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]);
}
*count = i;
@@ -445,30 +432,28 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
return 0;
}
-static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p,
- struct mountres *res)
+static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct mountres *res)
{
- struct xdr_stream xdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
- status = decode_fhs_status(&xdr, res);
+ status = decode_fhs_status(xdr, res);
if (unlikely(status != 0 || res->errno != 0))
return status;
- status = decode_fhandle3(&xdr, res);
+ status = decode_fhandle3(xdr, res);
if (unlikely(status != 0)) {
res->errno = -EBADHANDLE;
return 0;
}
- return decode_auth_flavors(&xdr, res);
+ return decode_auth_flavors(xdr, res);
}
static struct rpc_procinfo mnt_procedures[] = {
[MOUNTPROC_MNT] = {
.p_proc = MOUNTPROC_MNT,
- .p_encode = (kxdrproc_t)mnt_enc_dirpath,
- .p_decode = (kxdrproc_t)mnt_dec_mountres,
+ .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
+ .p_decode = (kxdrdproc_t)mnt_xdr_dec_mountres,
.p_arglen = MNT_enc_dirpath_sz,
.p_replen = MNT_dec_mountres_sz,
.p_statidx = MOUNTPROC_MNT,
@@ -476,7 +461,7 @@ static struct rpc_procinfo mnt_procedures[] = {
},
[MOUNTPROC_UMNT] = {
.p_proc = MOUNTPROC_UMNT,
- .p_encode = (kxdrproc_t)mnt_enc_dirpath,
+ .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
.p_arglen = MNT_enc_dirpath_sz,
.p_statidx = MOUNTPROC_UMNT,
.p_name = "UMOUNT",
@@ -486,8 +471,8 @@ static struct rpc_procinfo mnt_procedures[] = {
static struct rpc_procinfo mnt3_procedures[] = {
[MOUNTPROC3_MNT] = {
.p_proc = MOUNTPROC3_MNT,
- .p_encode = (kxdrproc_t)mnt_enc_dirpath,
- .p_decode = (kxdrproc_t)mnt_dec_mountres3,
+ .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
+ .p_decode = (kxdrdproc_t)mnt_xdr_dec_mountres3,
.p_arglen = MNT_enc_dirpath_sz,
.p_replen = MNT_dec_mountres3_sz,
.p_statidx = MOUNTPROC3_MNT,
@@ -495,7 +480,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
},
[MOUNTPROC3_UMNT] = {
.p_proc = MOUNTPROC3_UMNT,
- .p_encode = (kxdrproc_t)mnt_enc_dirpath,
+ .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
.p_arglen = MNT_enc_dirpath_sz,
.p_statidx = MOUNTPROC3_UMNT,
.p_name = "UMOUNT",
@@ -505,13 +490,13 @@ static struct rpc_procinfo mnt3_procedures[] = {
static struct rpc_version mnt_version1 = {
.number = 1,
- .nrprocs = 2,
+ .nrprocs = ARRAY_SIZE(mnt_procedures),
.procs = mnt_procedures,
};
static struct rpc_version mnt_version3 = {
.number = 3,
- .nrprocs = 2,
+ .nrprocs = ARRAY_SIZE(mnt3_procedures),
.procs = mnt3_procedures,
};
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index db6aa36..74aaf39 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -49,12 +49,17 @@ char *nfs_path(const char *base,
const struct dentry *dentry,
char *buffer, ssize_t buflen)
{
- char *end = buffer+buflen;
+ char *end;
int namelen;
+ unsigned seq;
+rename_retry:
+ end = buffer+buflen;
*--end = '\0';
buflen--;
- spin_lock(&dcache_lock);
+
+ seq = read_seqbegin(&rename_lock);
+ rcu_read_lock();
while (!IS_ROOT(dentry) && dentry != droot) {
namelen = dentry->d_name.len;
buflen -= namelen + 1;
@@ -65,7 +70,9 @@ char *nfs_path(const char *base,
*--end = '/';
dentry = dentry->d_parent;
}
- spin_unlock(&dcache_lock);
+ rcu_read_unlock();
+ if (read_seqretry(&rename_lock, seq))
+ goto rename_retry;
if (*end != '/') {
if (--buflen < 0)
goto Elong;
@@ -82,7 +89,9 @@ char *nfs_path(const char *base,
memcpy(end, base, namelen);
return end;
Elong_unlock:
- spin_unlock(&dcache_lock);
+ rcu_read_unlock();
+ if (read_seqretry(&rename_lock, seq))
+ goto rename_retry;
Elong:
return ERR_PTR(-ENAMETOOLONG);
}
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index e6bf457..792cb13 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -61,582 +61,1008 @@
#define NFS_readdirres_sz (1)
#define NFS_statfsres_sz (1+NFS_info_sz)
+
/*
- * Common NFS XDR functions as inlines
+ * While encoding arguments, set up the reply buffer in advance to
+ * receive reply data directly into the page cache.
*/
-static inline __be32 *
-xdr_encode_fhandle(__be32 *p, const struct nfs_fh *fhandle)
+static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
+ unsigned int base, unsigned int len,
+ unsigned int bufsize)
{
- memcpy(p, fhandle->data, NFS2_FHSIZE);
- return p + XDR_QUADLEN(NFS2_FHSIZE);
+ struct rpc_auth *auth = req->rq_cred->cr_auth;
+ unsigned int replen;
+
+ replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
+ xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
}
-static inline __be32 *
-xdr_decode_fhandle(__be32 *p, struct nfs_fh *fhandle)
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
{
- /* NFSv2 handles have a fixed length */
- fhandle->size = NFS2_FHSIZE;
- memcpy(fhandle->data, p, NFS2_FHSIZE);
- return p + XDR_QUADLEN(NFS2_FHSIZE);
+ dprintk("NFS: %s prematurely hit the end of our receive buffer. "
+ "Remaining buffer length is %tu words.\n",
+ func, xdr->end - xdr->p);
}
-static inline __be32*
-xdr_encode_time(__be32 *p, struct timespec *timep)
+
+/*
+ * Encode/decode NFSv2 basic data types
+ *
+ * Basic NFSv2 data types are defined in section 2.3 of RFC 1094:
+ * "NFS: Network File System Protocol Specification".
+ *
+ * Not all basic data types have their own encoding and decoding
+ * functions. For run-time efficiency, some data types are encoded
+ * or decoded inline.
+ */
+
+/*
+ * typedef opaque nfsdata<>;
+ */
+static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result)
{
- *p++ = htonl(timep->tv_sec);
- /* Convert nanoseconds into microseconds */
- *p++ = htonl(timep->tv_nsec ? timep->tv_nsec / 1000 : 0);
+ u32 recvd, count;
+ size_t hdrlen;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ count = be32_to_cpup(p);
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ recvd = xdr->buf->len - hdrlen;
+ if (unlikely(count > recvd))
+ goto out_cheating;
+out:
+ xdr_read_pages(xdr, count);
+ result->eof = 0; /* NFSv2 does not pass EOF flag on the wire. */
+ result->count = count;
+ return count;
+out_cheating:
+ dprintk("NFS: server cheating in read result: "
+ "count %u > recvd %u\n", count, recvd);
+ count = recvd;
+ goto out;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * enum stat {
+ * NFS_OK = 0,
+ * NFSERR_PERM = 1,
+ * NFSERR_NOENT = 2,
+ * NFSERR_IO = 5,
+ * NFSERR_NXIO = 6,
+ * NFSERR_ACCES = 13,
+ * NFSERR_EXIST = 17,
+ * NFSERR_NODEV = 19,
+ * NFSERR_NOTDIR = 20,
+ * NFSERR_ISDIR = 21,
+ * NFSERR_FBIG = 27,
+ * NFSERR_NOSPC = 28,
+ * NFSERR_ROFS = 30,
+ * NFSERR_NAMETOOLONG = 63,
+ * NFSERR_NOTEMPTY = 66,
+ * NFSERR_DQUOT = 69,
+ * NFSERR_STALE = 70,
+ * NFSERR_WFLUSH = 99
+ * };
+ */
+static int decode_stat(struct xdr_stream *xdr, enum nfs_stat *status)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ *status = be32_to_cpup(p);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * 2.3.2. ftype
+ *
+ * enum ftype {
+ * NFNON = 0,
+ * NFREG = 1,
+ * NFDIR = 2,
+ * NFBLK = 3,
+ * NFCHR = 4,
+ * NFLNK = 5
+ * };
+ *
+ */
+static __be32 *xdr_decode_ftype(__be32 *p, u32 *type)
+{
+ *type = be32_to_cpup(p++);
+ if (unlikely(*type > NF2FIFO))
+ *type = NFBAD;
return p;
}
-static inline __be32*
-xdr_encode_current_server_time(__be32 *p, struct timespec *timep)
+/*
+ * 2.3.3. fhandle
+ *
+ * typedef opaque fhandle[FHSIZE];
+ */
+static void encode_fhandle(struct xdr_stream *xdr, const struct nfs_fh *fh)
+{
+ __be32 *p;
+
+ BUG_ON(fh->size != NFS2_FHSIZE);
+ p = xdr_reserve_space(xdr, NFS2_FHSIZE);
+ memcpy(p, fh->data, NFS2_FHSIZE);
+}
+
+static int decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh)
{
- /*
- * Passing the invalid value useconds=1000000 is a
- * Sun convention for "set to current server time".
- * It's needed to make permissions checks for the
- * "touch" program across v2 mounts to Solaris and
- * Irix boxes work correctly. See description of
- * sattr in section 6.1 of "NFS Illustrated" by
- * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5
- */
- *p++ = htonl(timep->tv_sec);
- *p++ = htonl(1000000);
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS2_FHSIZE);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ fh->size = NFS2_FHSIZE;
+ memcpy(fh->data, p, NFS2_FHSIZE);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * 2.3.4. timeval
+ *
+ * struct timeval {
+ * unsigned int seconds;
+ * unsigned int useconds;
+ * };
+ */
+static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep)
+{
+ *p++ = cpu_to_be32(timep->tv_sec);
+ if (timep->tv_nsec != 0)
+ *p++ = cpu_to_be32(timep->tv_nsec / NSEC_PER_USEC);
+ else
+ *p++ = cpu_to_be32(0);
+ return p;
+}
+
+/*
+ * Passing the invalid value useconds=1000000 is a Sun convention for
+ * "set to current server time". It's needed to make permissions checks
+ * for the "touch" program across v2 mounts to Solaris and Irix servers
+ * work correctly. See description of sattr in section 6.1 of "NFS
+ * Illustrated" by Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5.
+ */
+static __be32 *xdr_encode_current_server_time(__be32 *p,
+ const struct timespec *timep)
+{
+ *p++ = cpu_to_be32(timep->tv_sec);
+ *p++ = cpu_to_be32(1000000);
return p;
}
-static inline __be32*
-xdr_decode_time(__be32 *p, struct timespec *timep)
+static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep)
{
- timep->tv_sec = ntohl(*p++);
- /* Convert microseconds into nanoseconds */
- timep->tv_nsec = ntohl(*p++) * 1000;
+ timep->tv_sec = be32_to_cpup(p++);
+ timep->tv_nsec = be32_to_cpup(p++) * NSEC_PER_USEC;
return p;
}
-static __be32 *
-xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
+/*
+ * 2.3.5. fattr
+ *
+ * struct fattr {
+ * ftype type;
+ * unsigned int mode;
+ * unsigned int nlink;
+ * unsigned int uid;
+ * unsigned int gid;
+ * unsigned int size;
+ * unsigned int blocksize;
+ * unsigned int rdev;
+ * unsigned int blocks;
+ * unsigned int fsid;
+ * unsigned int fileid;
+ * timeval atime;
+ * timeval mtime;
+ * timeval ctime;
+ * };
+ *
+ */
+static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
{
u32 rdev, type;
- type = ntohl(*p++);
- fattr->mode = ntohl(*p++);
- fattr->nlink = ntohl(*p++);
- fattr->uid = ntohl(*p++);
- fattr->gid = ntohl(*p++);
- fattr->size = ntohl(*p++);
- fattr->du.nfs2.blocksize = ntohl(*p++);
- rdev = ntohl(*p++);
- fattr->du.nfs2.blocks = ntohl(*p++);
- fattr->fsid.major = ntohl(*p++);
- fattr->fsid.minor = 0;
- fattr->fileid = ntohl(*p++);
- p = xdr_decode_time(p, &fattr->atime);
- p = xdr_decode_time(p, &fattr->mtime);
- p = xdr_decode_time(p, &fattr->ctime);
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS_fattr_sz << 2);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+
fattr->valid |= NFS_ATTR_FATTR_V2;
+
+ p = xdr_decode_ftype(p, &type);
+
+ fattr->mode = be32_to_cpup(p++);
+ fattr->nlink = be32_to_cpup(p++);
+ fattr->uid = be32_to_cpup(p++);
+ fattr->gid = be32_to_cpup(p++);
+ fattr->size = be32_to_cpup(p++);
+ fattr->du.nfs2.blocksize = be32_to_cpup(p++);
+
+ rdev = be32_to_cpup(p++);
fattr->rdev = new_decode_dev(rdev);
- if (type == NFCHR && rdev == NFS2_FIFO_DEV) {
+ if (type == (u32)NFCHR && rdev == (u32)NFS2_FIFO_DEV) {
fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
fattr->rdev = 0;
}
+
+ fattr->du.nfs2.blocks = be32_to_cpup(p++);
+ fattr->fsid.major = be32_to_cpup(p++);
+ fattr->fsid.minor = 0;
+ fattr->fileid = be32_to_cpup(p++);
+
+ p = xdr_decode_time(p, &fattr->atime);
+ p = xdr_decode_time(p, &fattr->mtime);
+ xdr_decode_time(p, &fattr->ctime);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * 2.3.6. sattr
+ *
+ * struct sattr {
+ * unsigned int mode;
+ * unsigned int uid;
+ * unsigned int gid;
+ * unsigned int size;
+ * timeval atime;
+ * timeval mtime;
+ * };
+ */
+
+#define NFS2_SATTR_NOT_SET (0xffffffff)
+
+static __be32 *xdr_time_not_set(__be32 *p)
+{
+ *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
+ *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
return p;
}
-static inline __be32 *
-xdr_encode_sattr(__be32 *p, struct iattr *attr)
+static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
{
- const __be32 not_set = __constant_htonl(0xFFFFFFFF);
+ __be32 *p;
- *p++ = (attr->ia_valid & ATTR_MODE) ? htonl(attr->ia_mode) : not_set;
- *p++ = (attr->ia_valid & ATTR_UID) ? htonl(attr->ia_uid) : not_set;
- *p++ = (attr->ia_valid & ATTR_GID) ? htonl(attr->ia_gid) : not_set;
- *p++ = (attr->ia_valid & ATTR_SIZE) ? htonl(attr->ia_size) : not_set;
+ p = xdr_reserve_space(xdr, NFS_sattr_sz << 2);
- if (attr->ia_valid & ATTR_ATIME_SET) {
+ if (attr->ia_valid & ATTR_MODE)
+ *p++ = cpu_to_be32(attr->ia_mode);
+ else
+ *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
+ if (attr->ia_valid & ATTR_UID)
+ *p++ = cpu_to_be32(attr->ia_uid);
+ else
+ *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
+ if (attr->ia_valid & ATTR_GID)
+ *p++ = cpu_to_be32(attr->ia_gid);
+ else
+ *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
+ if (attr->ia_valid & ATTR_SIZE)
+ *p++ = cpu_to_be32((u32)attr->ia_size);
+ else
+ *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
+
+ if (attr->ia_valid & ATTR_ATIME_SET)
p = xdr_encode_time(p, &attr->ia_atime);
- } else if (attr->ia_valid & ATTR_ATIME) {
+ else if (attr->ia_valid & ATTR_ATIME)
p = xdr_encode_current_server_time(p, &attr->ia_atime);
- } else {
- *p++ = not_set;
- *p++ = not_set;
- }
-
- if (attr->ia_valid & ATTR_MTIME_SET) {
- p = xdr_encode_time(p, &attr->ia_mtime);
- } else if (attr->ia_valid & ATTR_MTIME) {
- p = xdr_encode_current_server_time(p, &attr->ia_mtime);
- } else {
- *p++ = not_set;
- *p++ = not_set;
- }
- return p;
+ else
+ p = xdr_time_not_set(p);
+ if (attr->ia_valid & ATTR_MTIME_SET)
+ xdr_encode_time(p, &attr->ia_mtime);
+ else if (attr->ia_valid & ATTR_MTIME)
+ xdr_encode_current_server_time(p, &attr->ia_mtime);
+ else
+ xdr_time_not_set(p);
}
/*
- * NFS encode functions
+ * 2.3.7. filename
+ *
+ * typedef string filename<MAXNAMLEN>;
*/
+static void encode_filename(struct xdr_stream *xdr,
+ const char *name, u32 length)
+{
+ __be32 *p;
+
+ BUG_ON(length > NFS2_MAXNAMLEN);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, name, length);
+}
+
+static int decode_filename_inline(struct xdr_stream *xdr,
+ const char **name, u32 *length)
+{
+ __be32 *p;
+ u32 count;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ count = be32_to_cpup(p);
+ if (count > NFS3_MAXNAMLEN)
+ goto out_nametoolong;
+ p = xdr_inline_decode(xdr, count);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ *name = (const char *)p;
+ *length = count;
+ return 0;
+out_nametoolong:
+ dprintk("NFS: returned filename too long: %u\n", count);
+ return -ENAMETOOLONG;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
/*
- * Encode file handle argument
- * GETATTR, READLINK, STATFS
+ * 2.3.8. path
+ *
+ * typedef string path<MAXPATHLEN>;
*/
-static int
-nfs_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh)
+static void encode_path(struct xdr_stream *xdr, struct page **pages, u32 length)
+{
+ __be32 *p;
+
+ BUG_ON(length > NFS2_MAXPATHLEN);
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(length);
+ xdr_write_pages(xdr, pages, 0, length);
+}
+
+static int decode_path(struct xdr_stream *xdr)
{
- p = xdr_encode_fhandle(p, fh);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ u32 length, recvd;
+ size_t hdrlen;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ length = be32_to_cpup(p);
+ if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN))
+ goto out_size;
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ recvd = xdr->buf->len - hdrlen;
+ if (unlikely(length > recvd))
+ goto out_cheating;
+
+ xdr_read_pages(xdr, length);
+ xdr_terminate_string(xdr->buf, length);
return 0;
+out_size:
+ dprintk("NFS: returned pathname too long: %u\n", length);
+ return -ENAMETOOLONG;
+out_cheating:
+ dprintk("NFS: server cheating in pathname result: "
+ "length %u > received %u\n", length, recvd);
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
/*
- * Encode SETATTR arguments
+ * 2.3.9. attrstat
+ *
+ * union attrstat switch (stat status) {
+ * case NFS_OK:
+ * fattr attributes;
+ * default:
+ * void;
+ * };
*/
-static int
-nfs_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs_sattrargs *args)
+static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_sattr(p, args->sattr);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+ error = decode_fattr(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
/*
- * Encode directory ops argument
- * LOOKUP, RMDIR
+ * 2.3.10. diropargs
+ *
+ * struct diropargs {
+ * fhandle dir;
+ * filename name;
+ * };
*/
-static int
-nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args)
+static void encode_diropargs(struct xdr_stream *xdr, const struct nfs_fh *fh,
+ const char *name, u32 length)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name, args->len);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_fhandle(xdr, fh);
+ encode_filename(xdr, name, length);
}
/*
- * Encode REMOVE argument
+ * 2.3.11. diropres
+ *
+ * union diropres switch (stat status) {
+ * case NFS_OK:
+ * struct {
+ * fhandle file;
+ * fattr attributes;
+ * } diropok;
+ * default:
+ * void;
+ * };
*/
-static int
-nfs_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args)
+static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name.name, args->name.len);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ int error;
+
+ error = decode_fhandle(xdr, result->fh);
+ if (unlikely(error))
+ goto out;
+ error = decode_fattr(xdr, result->fattr);
+out:
+ return error;
+}
+
+static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+ error = decode_diropok(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
+
/*
- * Arguments to a READ call. Since we read data directly into the page
- * cache, we also set up the reply iovec here so that iov[1] points
- * exactly to the page we want to fetch.
+ * NFSv2 XDR encode functions
+ *
+ * NFSv2 argument types are defined in section 2.2 of RFC 1094:
+ * "NFS: Network File System Protocol Specification".
*/
-static int
-nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
+
+static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_fh *fh)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
- u32 offset = (u32)args->offset;
+ encode_fhandle(xdr, fh);
+}
+
+/*
+ * 2.2.3. sattrargs
+ *
+ * struct sattrargs {
+ * fhandle file;
+ * sattr attributes;
+ * };
+ */
+static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_sattrargs *args)
+{
+ encode_fhandle(xdr, args->fh);
+ encode_sattr(xdr, args->sattr);
+}
+
+static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_diropargs *args)
+{
+ encode_diropargs(xdr, args->fh, args->name, args->len);
+}
+
+static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_readlinkargs *args)
+{
+ encode_fhandle(xdr, args->fh);
+ prepare_reply_buffer(req, args->pages, args->pgbase,
+ args->pglen, NFS_readlinkres_sz);
+}
+
+/*
+ * 2.2.7. readargs
+ *
+ * struct readargs {
+ * fhandle file;
+ * unsigned offset;
+ * unsigned count;
+ * unsigned totalcount;
+ * };
+ */
+static void encode_readargs(struct xdr_stream *xdr,
+ const struct nfs_readargs *args)
+{
+ u32 offset = args->offset;
u32 count = args->count;
+ __be32 *p;
- p = xdr_encode_fhandle(p, args->fh);
- *p++ = htonl(offset);
- *p++ = htonl(count);
- *p++ = htonl(count);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ encode_fhandle(xdr, args->fh);
+
+ p = xdr_reserve_space(xdr, 4 + 4 + 4);
+ *p++ = cpu_to_be32(offset);
+ *p++ = cpu_to_be32(count);
+ *p = cpu_to_be32(count);
+}
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen,
- args->pages, args->pgbase, count);
+static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_readargs *args)
+{
+ encode_readargs(xdr, args);
+ prepare_reply_buffer(req, args->pages, args->pgbase,
+ args->count, NFS_readres_sz);
req->rq_rcv_buf.flags |= XDRBUF_READ;
- return 0;
}
/*
- * Decode READ reply
+ * 2.2.9. writeargs
+ *
+ * struct writeargs {
+ * fhandle file;
+ * unsigned beginoffset;
+ * unsigned offset;
+ * unsigned totalcount;
+ * nfsdata data;
+ * };
*/
-static int
-nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
+static void encode_writeargs(struct xdr_stream *xdr,
+ const struct nfs_writeargs *args)
{
- struct kvec *iov = req->rq_rcv_buf.head;
- size_t hdrlen;
- u32 count, recvd;
- int status;
-
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
- p = xdr_decode_fattr(p, res->fattr);
-
- count = ntohl(*p++);
- res->eof = 0;
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
- if (iov->iov_len < hdrlen) {
- dprintk("NFS: READ reply header overflowed:"
- "length %Zu > %Zu\n", hdrlen, iov->iov_len);
- return -errno_NFSERR_IO;
- } else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READ header is short. iovec will be shifted.\n");
- xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
- }
+ u32 offset = args->offset;
+ u32 count = args->count;
+ __be32 *p;
- recvd = req->rq_rcv_buf.len - hdrlen;
- if (count > recvd) {
- dprintk("NFS: server cheating in read reply: "
- "count %u > recvd %u\n", count, recvd);
- count = recvd;
- }
+ encode_fhandle(xdr, args->fh);
- dprintk("RPC: readres OK count %u\n", count);
- if (count < res->count)
- res->count = count;
+ p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
+ *p++ = cpu_to_be32(offset);
+ *p++ = cpu_to_be32(offset);
+ *p++ = cpu_to_be32(count);
- return count;
+ /* nfsdata */
+ *p = cpu_to_be32(count);
+ xdr_write_pages(xdr, args->pages, args->pgbase, count);
}
+static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_writeargs *args)
+{
+ encode_writeargs(xdr, args);
+ xdr->buf->flags |= XDRBUF_WRITE;
+}
/*
- * Write arguments. Splice the buffer to be written into the iovec.
+ * 2.2.10. createargs
+ *
+ * struct createargs {
+ * diropargs where;
+ * sattr attributes;
+ * };
*/
-static int
-nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_createargs *args)
{
- struct xdr_buf *sndbuf = &req->rq_snd_buf;
- u32 offset = (u32)args->offset;
- u32 count = args->count;
-
- p = xdr_encode_fhandle(p, args->fh);
- *p++ = htonl(offset);
- *p++ = htonl(offset);
- *p++ = htonl(count);
- *p++ = htonl(count);
- sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+ encode_diropargs(xdr, args->fh, args->name, args->len);
+ encode_sattr(xdr, args->sattr);
+}
- /* Copy the page array */
- xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
- sndbuf->flags |= XDRBUF_WRITE;
- return 0;
+static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_removeargs *args)
+{
+ encode_diropargs(xdr, args->fh, args->name.name, args->name.len);
}
/*
- * Encode create arguments
- * CREATE, MKDIR
+ * 2.2.12. renameargs
+ *
+ * struct renameargs {
+ * diropargs from;
+ * diropargs to;
+ * };
*/
-static int
-nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args)
+static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_renameargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name, args->len);
- p = xdr_encode_sattr(p, args->sattr);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ const struct qstr *old = args->old_name;
+ const struct qstr *new = args->new_name;
+
+ encode_diropargs(xdr, args->old_dir, old->name, old->len);
+ encode_diropargs(xdr, args->new_dir, new->name, new->len);
}
/*
- * Encode RENAME arguments
+ * 2.2.13. linkargs
+ *
+ * struct linkargs {
+ * fhandle from;
+ * diropargs to;
+ * };
*/
-static int
-nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
+static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_linkargs *args)
{
- p = xdr_encode_fhandle(p, args->old_dir);
- p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
- p = xdr_encode_fhandle(p, args->new_dir);
- p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_fhandle(xdr, args->fromfh);
+ encode_diropargs(xdr, args->tofh, args->toname, args->tolen);
}
/*
- * Encode LINK arguments
+ * 2.2.14. symlinkargs
+ *
+ * struct symlinkargs {
+ * diropargs from;
+ * path to;
+ * sattr attributes;
+ * };
*/
-static int
-nfs_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs_linkargs *args)
+static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_symlinkargs *args)
{
- p = xdr_encode_fhandle(p, args->fromfh);
- p = xdr_encode_fhandle(p, args->tofh);
- p = xdr_encode_array(p, args->toname, args->tolen);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
+ encode_path(xdr, args->pages, args->pathlen);
+ encode_sattr(xdr, args->sattr);
}
/*
- * Encode SYMLINK arguments
+ * 2.2.17. readdirargs
+ *
+ * struct readdirargs {
+ * fhandle dir;
+ * nfscookie cookie;
+ * unsigned count;
+ * };
*/
-static int
-nfs_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_symlinkargs *args)
+static void encode_readdirargs(struct xdr_stream *xdr,
+ const struct nfs_readdirargs *args)
{
- struct xdr_buf *sndbuf = &req->rq_snd_buf;
- size_t pad;
+ __be32 *p;
- p = xdr_encode_fhandle(p, args->fromfh);
- p = xdr_encode_array(p, args->fromname, args->fromlen);
- *p++ = htonl(args->pathlen);
- sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+ encode_fhandle(xdr, args->fh);
- xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen);
+ p = xdr_reserve_space(xdr, 4 + 4);
+ *p++ = cpu_to_be32(args->cookie);
+ *p = cpu_to_be32(args->count);
+}
- /*
- * xdr_encode_pages may have added a few bytes to ensure the
- * pathname ends on a 4-byte boundary. Start encoding the
- * attributes after the pad bytes.
- */
- pad = sndbuf->tail->iov_len;
- if (pad > 0)
- p++;
- p = xdr_encode_sattr(p, args->sattr);
- sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad;
- return 0;
+static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_readdirargs *args)
+{
+ encode_readdirargs(xdr, args);
+ prepare_reply_buffer(req, args->pages, 0,
+ args->count, NFS_readdirres_sz);
}
/*
- * Encode arguments to readdir call
+ * NFSv2 XDR decode functions
+ *
+ * NFSv2 result types are defined in section 2.2 of RFC 1094:
+ * "NFS: Network File System Protocol Specification".
*/
-static int
-nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
+
+static int nfs2_xdr_dec_stat(struct rpc_rqst *req, struct xdr_stream *xdr,
+ void *__unused)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
- u32 count = args->count;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
+}
- p = xdr_encode_fhandle(p, args->fh);
- *p++ = htonl(args->cookie);
- *p++ = htonl(count); /* see above */
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_fattr *result)
+{
+ return decode_attrstat(xdr, result);
+}
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
- return 0;
+static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_diropok *result)
+{
+ return decode_diropres(xdr, result);
}
/*
- * Decode the result of a readdir call.
- * We're not really decoding anymore, we just leave the buffer untouched
- * and only check that it is syntactically correct.
- * The real decoding happens in nfs_decode_entry below, called directly
- * from nfs_readdir for each entry.
+ * 2.2.6. readlinkres
+ *
+ * union readlinkres switch (stat status) {
+ * case NFS_OK:
+ * path data;
+ * default:
+ * void;
+ * };
*/
-static int
-nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
+static int nfs2_xdr_dec_readlinkres(struct rpc_rqst *req,
+ struct xdr_stream *xdr, void *__unused)
{
- struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- struct kvec *iov = rcvbuf->head;
- struct page **page;
- size_t hdrlen;
- unsigned int pglen, recvd;
- int status, nr = 0;
-
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
-
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
- if (iov->iov_len < hdrlen) {
- dprintk("NFS: READDIR reply header overflowed:"
- "length %Zu > %Zu\n", hdrlen, iov->iov_len);
- return -errno_NFSERR_IO;
- } else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
- xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
- }
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+ error = decode_path(xdr);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
+}
- pglen = rcvbuf->page_len;
- recvd = rcvbuf->len - hdrlen;
- if (pglen > recvd)
- pglen = recvd;
- page = rcvbuf->pages;
- return nr;
+/*
+ * 2.2.7. readres
+ *
+ * union readres switch (stat status) {
+ * case NFS_OK:
+ * fattr attributes;
+ * nfsdata data;
+ * default:
+ * void;
+ * };
+ */
+static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_readres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+ error = decode_fattr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ error = decode_nfsdata(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_writeres *result)
{
- dprintk("nfs: %s: prematurely hit end of receive buffer. "
- "Remaining buffer length is %tu words.\n",
- func, xdr->end - xdr->p);
+ /* All NFSv2 writes are "file sync" writes */
+ result->verf->committed = NFS_FILE_SYNC;
+ return decode_attrstat(xdr, result->fattr);
}
-__be32 *
-nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
+/**
+ * nfs2_decode_dirent - Decode a single NFSv2 directory entry stored in
+ * the local page cache.
+ * @xdr: XDR stream where entry resides
+ * @entry: buffer to fill in with entry data
+ * @plus: boolean indicating whether this should be a readdirplus entry
+ *
+ * Returns zero if successful, otherwise a negative errno value is
+ * returned.
+ *
+ * This function is not invoked during READDIR reply decoding, but
+ * rather whenever an application invokes the getdents(2) system call
+ * on a directory already in our cache.
+ *
+ * 2.2.17. entry
+ *
+ * struct entry {
+ * unsigned fileid;
+ * filename name;
+ * nfscookie cookie;
+ * entry *nextentry;
+ * };
+ */
+int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+ int plus)
{
__be32 *p;
+ int error;
+
p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
+ if (unlikely(p == NULL))
goto out_overflow;
- if (!ntohl(*p++)) {
+ if (*p++ == xdr_zero) {
p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
+ if (unlikely(p == NULL))
goto out_overflow;
- if (!ntohl(*p++))
- return ERR_PTR(-EAGAIN);
+ if (*p++ == xdr_zero)
+ return -EAGAIN;
entry->eof = 1;
- return ERR_PTR(-EBADCOOKIE);
+ return -EBADCOOKIE;
}
- p = xdr_inline_decode(xdr, 8);
- if (unlikely(!p))
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
goto out_overflow;
+ entry->ino = be32_to_cpup(p);
- entry->ino = ntohl(*p++);
- entry->len = ntohl(*p++);
+ error = decode_filename_inline(xdr, &entry->name, &entry->len);
+ if (unlikely(error))
+ return error;
- p = xdr_inline_decode(xdr, entry->len + 4);
- if (unlikely(!p))
+ /*
+ * The type (size and byte order) of nfscookie isn't defined in
+ * RFC 1094. This implementation assumes that it's an XDR uint32.
+ */
+ entry->prev_cookie = entry->cookie;
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
goto out_overflow;
- entry->name = (const char *) p;
- p += XDR_QUADLEN(entry->len);
- entry->prev_cookie = entry->cookie;
- entry->cookie = ntohl(*p++);
-
- p = xdr_inline_peek(xdr, 8);
- if (p != NULL)
- entry->eof = !p[0] && p[1];
- else
- entry->eof = 0;
+ entry->cookie = be32_to_cpup(p);
- return p;
+ entry->d_type = DT_UNKNOWN;
+
+ return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
- return ERR_PTR(-EIO);
-}
-
-/*
- * NFS XDR decode functions
- */
-/*
- * Decode simple status reply
- */
-static int
-nfs_xdr_stat(struct rpc_rqst *req, __be32 *p, void *dummy)
-{
- int status;
-
- if ((status = ntohl(*p++)) != 0)
- status = nfs_stat_to_errno(status);
- return status;
-}
-
-/*
- * Decode attrstat reply
- * GETATTR, SETATTR, WRITE
- */
-static int
-nfs_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
-{
- int status;
-
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
- xdr_decode_fattr(p, fattr);
- return 0;
+ return -EAGAIN;
}
/*
- * Decode diropres reply
- * LOOKUP, CREATE, MKDIR
+ * 2.2.17. readdirres
+ *
+ * union readdirres switch (stat status) {
+ * case NFS_OK:
+ * struct {
+ * entry *entries;
+ * bool eof;
+ * } readdirok;
+ * default:
+ * void;
+ * };
+ *
+ * Read the directory contents into the page cache, but don't
+ * touch them. The actual decoding is done by nfs2_decode_dirent()
+ * during subsequent nfs_readdir() calls.
*/
-static int
-nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
+static int decode_readdirok(struct xdr_stream *xdr)
{
- int status;
+ u32 recvd, pglen;
+ size_t hdrlen;
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
- p = xdr_decode_fhandle(p, res->fh);
- xdr_decode_fattr(p, res->fattr);
- return 0;
+ pglen = xdr->buf->page_len;
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ recvd = xdr->buf->len - hdrlen;
+ if (unlikely(pglen > recvd))
+ goto out_cheating;
+out:
+ xdr_read_pages(xdr, pglen);
+ return pglen;
+out_cheating:
+ dprintk("NFS: server cheating in readdir result: "
+ "pglen %u > recvd %u\n", pglen, recvd);
+ pglen = recvd;
+ goto out;
}
-/*
- * Encode READLINK args
- */
-static int
-nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
+static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req,
+ struct xdr_stream *xdr, void *__unused)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
-
- p = xdr_encode_fhandle(p, args->fh);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen);
- return 0;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+ error = decode_readdirok(xdr);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode READLINK reply
+ * 2.2.18. statfsres
+ *
+ * union statfsres (stat status) {
+ * case NFS_OK:
+ * struct {
+ * unsigned tsize;
+ * unsigned bsize;
+ * unsigned blocks;
+ * unsigned bfree;
+ * unsigned bavail;
+ * } info;
+ * default:
+ * void;
+ * };
*/
-static int
-nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
+static int decode_info(struct xdr_stream *xdr, struct nfs2_fsstat *result)
{
- struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- struct kvec *iov = rcvbuf->head;
- size_t hdrlen;
- u32 len, recvd;
- int status;
-
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
- /* Convert length of symlink */
- len = ntohl(*p++);
- if (len >= rcvbuf->page_len) {
- dprintk("nfs: server returned giant symlink!\n");
- return -ENAMETOOLONG;
- }
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
- if (iov->iov_len < hdrlen) {
- dprintk("NFS: READLINK reply header overflowed:"
- "length %Zu > %Zu\n", hdrlen, iov->iov_len);
- return -errno_NFSERR_IO;
- } else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
- xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
- }
- recvd = req->rq_rcv_buf.len - hdrlen;
- if (recvd < len) {
- dprintk("NFS: server cheating in readlink reply: "
- "count %u > recvd %u\n", len, recvd);
- return -EIO;
- }
+ __be32 *p;
- xdr_terminate_string(rcvbuf, len);
+ p = xdr_inline_decode(xdr, NFS_info_sz << 2);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ result->tsize = be32_to_cpup(p++);
+ result->bsize = be32_to_cpup(p++);
+ result->blocks = be32_to_cpup(p++);
+ result->bfree = be32_to_cpup(p++);
+ result->bavail = be32_to_cpup(p);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
-/*
- * Decode WRITE reply
- */
-static int
-nfs_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
+static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs2_fsstat *result)
{
- res->verf->committed = NFS_FILE_SYNC;
- return nfs_xdr_attrstat(req, p, res->fattr);
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+ error = decode_info(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
-/*
- * Decode STATFS reply
- */
-static int
-nfs_xdr_statfsres(struct rpc_rqst *req, __be32 *p, struct nfs2_fsstat *res)
-{
- int status;
-
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
-
- res->tsize = ntohl(*p++);
- res->bsize = ntohl(*p++);
- res->blocks = ntohl(*p++);
- res->bfree = ntohl(*p++);
- res->bavail = ntohl(*p++);
- return 0;
-}
/*
* We need to translate between nfs status return values and
* the local errno values which may not be the same.
*/
-static struct {
+static const struct {
int stat;
int errno;
} nfs_errtbl[] = {
@@ -676,28 +1102,30 @@ static struct {
{ -1, -EIO }
};
-/*
- * Convert an NFS error code to a local one.
- * This one is used jointly by NFSv2 and NFSv3.
+/**
+ * nfs_stat_to_errno - convert an NFS status code to a local errno
+ * @status: NFS status code to convert
+ *
+ * Returns a local errno value, or -EIO if the NFS status code is
+ * not recognized. This function is used jointly by NFSv2 and NFSv3.
*/
-int
-nfs_stat_to_errno(int stat)
+int nfs_stat_to_errno(enum nfs_stat status)
{
int i;
for (i = 0; nfs_errtbl[i].stat != -1; i++) {
- if (nfs_errtbl[i].stat == stat)
+ if (nfs_errtbl[i].stat == (int)status)
return nfs_errtbl[i].errno;
}
- dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
+ dprintk("NFS: Unrecognized nfs status value: %u\n", status);
return nfs_errtbl[i].errno;
}
#define PROC(proc, argtype, restype, timer) \
[NFSPROC_##proc] = { \
.p_proc = NFSPROC_##proc, \
- .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
- .p_decode = (kxdrproc_t) nfs_xdr_##restype, \
+ .p_encode = (kxdreproc_t)nfs2_xdr_enc_##argtype, \
+ .p_decode = (kxdrdproc_t)nfs2_xdr_dec_##restype, \
.p_arglen = NFS_##argtype##_sz, \
.p_replen = NFS_##restype##_sz, \
.p_timer = timer, \
@@ -705,21 +1133,21 @@ nfs_stat_to_errno(int stat)
.p_name = #proc, \
}
struct rpc_procinfo nfs_procedures[] = {
- PROC(GETATTR, fhandle, attrstat, 1),
- PROC(SETATTR, sattrargs, attrstat, 0),
- PROC(LOOKUP, diropargs, diropres, 2),
- PROC(READLINK, readlinkargs, readlinkres, 3),
- PROC(READ, readargs, readres, 3),
- PROC(WRITE, writeargs, writeres, 4),
- PROC(CREATE, createargs, diropres, 0),
- PROC(REMOVE, removeargs, stat, 0),
- PROC(RENAME, renameargs, stat, 0),
- PROC(LINK, linkargs, stat, 0),
- PROC(SYMLINK, symlinkargs, stat, 0),
- PROC(MKDIR, createargs, diropres, 0),
- PROC(RMDIR, diropargs, stat, 0),
- PROC(READDIR, readdirargs, readdirres, 3),
- PROC(STATFS, fhandle, statfsres, 0),
+ PROC(GETATTR, fhandle, attrstat, 1),
+ PROC(SETATTR, sattrargs, attrstat, 0),
+ PROC(LOOKUP, diropargs, diropres, 2),
+ PROC(READLINK, readlinkargs, readlinkres, 3),
+ PROC(READ, readargs, readres, 3),
+ PROC(WRITE, writeargs, writeres, 4),
+ PROC(CREATE, createargs, diropres, 0),
+ PROC(REMOVE, removeargs, stat, 0),
+ PROC(RENAME, renameargs, stat, 0),
+ PROC(LINK, linkargs, stat, 0),
+ PROC(SYMLINK, symlinkargs, stat, 0),
+ PROC(MKDIR, createargs, diropres, 0),
+ PROC(RMDIR, diropargs, stat, 0),
+ PROC(READDIR, readdirargs, readdirres, 3),
+ PROC(STATFS, fhandle, statfsres, 0),
};
struct rpc_version nfs_version2 = {
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index d9a5e83..01c5e8b 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -37,18 +37,16 @@
#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2))
#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2))
#define NFS3_fattr_sz (21)
-#define NFS3_wcc_attr_sz (6)
+#define NFS3_cookieverf_sz (NFS3_COOKIEVERFSIZE>>2)
+#define NFS3_wcc_attr_sz (6)
#define NFS3_pre_op_attr_sz (1+NFS3_wcc_attr_sz)
#define NFS3_post_op_attr_sz (1+NFS3_fattr_sz)
-#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz)
-#define NFS3_fsstat_sz
-#define NFS3_fsinfo_sz
-#define NFS3_pathconf_sz
-#define NFS3_entry_sz (NFS3_filename_sz+3)
-
-#define NFS3_sattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3)
+#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz)
#define NFS3_diropargs_sz (NFS3_fh_sz+NFS3_filename_sz)
-#define NFS3_removeargs_sz (NFS3_fh_sz+NFS3_filename_sz)
+
+#define NFS3_getattrargs_sz (NFS3_fh_sz)
+#define NFS3_setattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3)
+#define NFS3_lookupargs_sz (NFS3_fh_sz+NFS3_filename_sz)
#define NFS3_accessargs_sz (NFS3_fh_sz+1)
#define NFS3_readlinkargs_sz (NFS3_fh_sz)
#define NFS3_readargs_sz (NFS3_fh_sz+3)
@@ -57,14 +55,16 @@
#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz)
#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz)
+#define NFS3_removeargs_sz (NFS3_fh_sz+NFS3_filename_sz)
#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz)
#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz)
-#define NFS3_readdirargs_sz (NFS3_fh_sz+2)
+#define NFS3_readdirargs_sz (NFS3_fh_sz+NFS3_cookieverf_sz+3)
+#define NFS3_readdirplusargs_sz (NFS3_fh_sz+NFS3_cookieverf_sz+4)
#define NFS3_commitargs_sz (NFS3_fh_sz+3)
-#define NFS3_attrstat_sz (1+NFS3_fattr_sz)
-#define NFS3_wccstat_sz (1+NFS3_wcc_data_sz)
-#define NFS3_removeres_sz (NFS3_wccstat_sz)
+#define NFS3_getattrres_sz (1+NFS3_fattr_sz)
+#define NFS3_setattrres_sz (1+NFS3_wcc_data_sz)
+#define NFS3_removeres_sz (NFS3_setattrres_sz)
#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
@@ -100,1077 +100,2362 @@ static const umode_t nfs_type2fmt[] = {
[NF3FIFO] = S_IFIFO,
};
+/*
+ * While encoding arguments, set up the reply buffer in advance to
+ * receive reply data directly into the page cache.
+ */
+static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
+ unsigned int base, unsigned int len,
+ unsigned int bufsize)
+{
+ struct rpc_auth *auth = req->rq_cred->cr_auth;
+ unsigned int replen;
+
+ replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
+ xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
+}
+
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
{
- dprintk("nfs: %s: prematurely hit end of receive buffer. "
+ dprintk("NFS: %s prematurely hit the end of our receive buffer. "
"Remaining buffer length is %tu words.\n",
func, xdr->end - xdr->p);
}
+
/*
- * Common NFS XDR functions as inlines
+ * Encode/decode NFSv3 basic data types
+ *
+ * Basic NFSv3 data types are defined in section 2.5 of RFC 1813:
+ * "NFS Version 3 Protocol Specification".
+ *
+ * Not all basic data types have their own encoding and decoding
+ * functions. For run-time efficiency, some data types are encoded
+ * or decoded inline.
*/
-static inline __be32 *
-xdr_encode_fhandle(__be32 *p, const struct nfs_fh *fh)
+
+static void encode_uint32(struct xdr_stream *xdr, u32 value)
{
- return xdr_encode_array(p, fh->data, fh->size);
+ __be32 *p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(value);
}
-static inline __be32 *
-xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh)
+static int decode_uint32(struct xdr_stream *xdr, u32 *value)
{
- if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) {
- memcpy(fh->data, p, fh->size);
- return p + XDR_QUADLEN(fh->size);
- }
- return NULL;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ *value = be32_to_cpup(p);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_uint64(struct xdr_stream *xdr, u64 *value)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ xdr_decode_hyper(p, value);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * fileid3
+ *
+ * typedef uint64 fileid3;
+ */
+static __be32 *xdr_decode_fileid3(__be32 *p, u64 *fileid)
+{
+ return xdr_decode_hyper(p, fileid);
+}
+
+static int decode_fileid3(struct xdr_stream *xdr, u64 *fileid)
+{
+ return decode_uint64(xdr, fileid);
+}
+
+/*
+ * filename3
+ *
+ * typedef string filename3<>;
+ */
+static void encode_filename3(struct xdr_stream *xdr,
+ const char *name, u32 length)
+{
+ __be32 *p;
+
+ BUG_ON(length > NFS3_MAXNAMLEN);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, name, length);
}
-static inline __be32 *
-xdr_decode_fhandle_stream(struct xdr_stream *xdr, struct nfs_fh *fh)
+static int decode_inline_filename3(struct xdr_stream *xdr,
+ const char **name, u32 *length)
{
__be32 *p;
+ u32 count;
+
p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ count = be32_to_cpup(p);
+ if (count > NFS3_MAXNAMLEN)
+ goto out_nametoolong;
+ p = xdr_inline_decode(xdr, count);
+ if (unlikely(p == NULL))
goto out_overflow;
- fh->size = ntohl(*p++);
+ *name = (const char *)p;
+ *length = count;
+ return 0;
- if (fh->size <= NFS3_FHSIZE) {
- p = xdr_inline_decode(xdr, fh->size);
- if (unlikely(!p))
- goto out_overflow;
- memcpy(fh->data, p, fh->size);
- return p + XDR_QUADLEN(fh->size);
- }
- return NULL;
+out_nametoolong:
+ dprintk("NFS: returned filename too long: %u\n", count);
+ return -ENAMETOOLONG;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * nfspath3
+ *
+ * typedef string nfspath3<>;
+ */
+static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages,
+ const u32 length)
+{
+ BUG_ON(length > NFS3_MAXPATHLEN);
+ encode_uint32(xdr, length);
+ xdr_write_pages(xdr, pages, 0, length);
+}
+static int decode_nfspath3(struct xdr_stream *xdr)
+{
+ u32 recvd, count;
+ size_t hdrlen;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ count = be32_to_cpup(p);
+ if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN))
+ goto out_nametoolong;
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ recvd = xdr->buf->len - hdrlen;
+ if (unlikely(count > recvd))
+ goto out_cheating;
+
+ xdr_read_pages(xdr, count);
+ xdr_terminate_string(xdr->buf, count);
+ return 0;
+
+out_nametoolong:
+ dprintk("NFS: returned pathname too long: %u\n", count);
+ return -ENAMETOOLONG;
+out_cheating:
+ dprintk("NFS: server cheating in pathname result: "
+ "count %u > recvd %u\n", count, recvd);
+ return -EIO;
out_overflow:
print_overflow_msg(__func__, xdr);
- return ERR_PTR(-EIO);
+ return -EIO;
}
/*
- * Encode/decode time.
+ * cookie3
+ *
+ * typedef uint64 cookie3
*/
-static inline __be32 *
-xdr_encode_time3(__be32 *p, struct timespec *timep)
+static __be32 *xdr_encode_cookie3(__be32 *p, u64 cookie)
{
- *p++ = htonl(timep->tv_sec);
- *p++ = htonl(timep->tv_nsec);
- return p;
+ return xdr_encode_hyper(p, cookie);
}
-static inline __be32 *
-xdr_decode_time3(__be32 *p, struct timespec *timep)
+static int decode_cookie3(struct xdr_stream *xdr, u64 *cookie)
{
- timep->tv_sec = ntohl(*p++);
- timep->tv_nsec = ntohl(*p++);
- return p;
+ return decode_uint64(xdr, cookie);
}
-static __be32 *
-xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
+/*
+ * cookieverf3
+ *
+ * typedef opaque cookieverf3[NFS3_COOKIEVERFSIZE];
+ */
+static __be32 *xdr_encode_cookieverf3(__be32 *p, const __be32 *verifier)
+{
+ memcpy(p, verifier, NFS3_COOKIEVERFSIZE);
+ return p + XDR_QUADLEN(NFS3_COOKIEVERFSIZE);
+}
+
+static int decode_cookieverf3(struct xdr_stream *xdr, __be32 *verifier)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ memcpy(verifier, p, NFS3_COOKIEVERFSIZE);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * createverf3
+ *
+ * typedef opaque createverf3[NFS3_CREATEVERFSIZE];
+ */
+static void encode_createverf3(struct xdr_stream *xdr, const __be32 *verifier)
{
- unsigned int type, major, minor;
- umode_t fmode;
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS3_CREATEVERFSIZE);
+ memcpy(p, verifier, NFS3_CREATEVERFSIZE);
+}
+
+static int decode_writeverf3(struct xdr_stream *xdr, __be32 *verifier)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ memcpy(verifier, p, NFS3_WRITEVERFSIZE);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * size3
+ *
+ * typedef uint64 size3;
+ */
+static __be32 *xdr_decode_size3(__be32 *p, u64 *size)
+{
+ return xdr_decode_hyper(p, size);
+}
+
+/*
+ * nfsstat3
+ *
+ * enum nfsstat3 {
+ * NFS3_OK = 0,
+ * ...
+ * }
+ */
+#define NFS3_OK NFS_OK
- type = ntohl(*p++);
+static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ *status = be32_to_cpup(p);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * ftype3
+ *
+ * enum ftype3 {
+ * NF3REG = 1,
+ * NF3DIR = 2,
+ * NF3BLK = 3,
+ * NF3CHR = 4,
+ * NF3LNK = 5,
+ * NF3SOCK = 6,
+ * NF3FIFO = 7
+ * };
+ */
+static void encode_ftype3(struct xdr_stream *xdr, const u32 type)
+{
+ BUG_ON(type > NF3FIFO);
+ encode_uint32(xdr, type);
+}
+
+static __be32 *xdr_decode_ftype3(__be32 *p, umode_t *mode)
+{
+ u32 type;
+
+ type = be32_to_cpup(p++);
if (type > NF3FIFO)
type = NF3NON;
- fmode = nfs_type2fmt[type];
- fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode;
- fattr->nlink = ntohl(*p++);
- fattr->uid = ntohl(*p++);
- fattr->gid = ntohl(*p++);
- p = xdr_decode_hyper(p, &fattr->size);
- p = xdr_decode_hyper(p, &fattr->du.nfs3.used);
-
- /* Turn remote device info into Linux-specific dev_t */
- major = ntohl(*p++);
- minor = ntohl(*p++);
- fattr->rdev = MKDEV(major, minor);
- if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor)
- fattr->rdev = 0;
+ *mode = nfs_type2fmt[type];
+ return p;
+}
- p = xdr_decode_hyper(p, &fattr->fsid.major);
- fattr->fsid.minor = 0;
- p = xdr_decode_hyper(p, &fattr->fileid);
- p = xdr_decode_time3(p, &fattr->atime);
- p = xdr_decode_time3(p, &fattr->mtime);
- p = xdr_decode_time3(p, &fattr->ctime);
+/*
+ * specdata3
+ *
+ * struct specdata3 {
+ * uint32 specdata1;
+ * uint32 specdata2;
+ * };
+ */
+static void encode_specdata3(struct xdr_stream *xdr, const dev_t rdev)
+{
+ __be32 *p;
- /* Update the mode bits */
- fattr->valid |= NFS_ATTR_FATTR_V3;
+ p = xdr_reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(MAJOR(rdev));
+ *p = cpu_to_be32(MINOR(rdev));
+}
+
+static __be32 *xdr_decode_specdata3(__be32 *p, dev_t *rdev)
+{
+ unsigned int major, minor;
+
+ major = be32_to_cpup(p++);
+ minor = be32_to_cpup(p++);
+ *rdev = MKDEV(major, minor);
+ if (MAJOR(*rdev) != major || MINOR(*rdev) != minor)
+ *rdev = 0;
+ return p;
+}
+
+/*
+ * nfs_fh3
+ *
+ * struct nfs_fh3 {
+ * opaque data<NFS3_FHSIZE>;
+ * };
+ */
+static void encode_nfs_fh3(struct xdr_stream *xdr, const struct nfs_fh *fh)
+{
+ __be32 *p;
+
+ BUG_ON(fh->size > NFS3_FHSIZE);
+ p = xdr_reserve_space(xdr, 4 + fh->size);
+ xdr_encode_opaque(p, fh->data, fh->size);
+}
+
+static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ length = be32_to_cpup(p++);
+ if (unlikely(length > NFS3_FHSIZE))
+ goto out_toobig;
+ p = xdr_inline_decode(xdr, length);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ fh->size = length;
+ memcpy(fh->data, p, length);
+ return 0;
+out_toobig:
+ dprintk("NFS: file handle size (%u) too big\n", length);
+ return -E2BIG;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static void zero_nfs_fh3(struct nfs_fh *fh)
+{
+ memset(fh, 0, sizeof(*fh));
+}
+
+/*
+ * nfstime3
+ *
+ * struct nfstime3 {
+ * uint32 seconds;
+ * uint32 nseconds;
+ * };
+ */
+static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec *timep)
+{
+ *p++ = cpu_to_be32(timep->tv_sec);
+ *p++ = cpu_to_be32(timep->tv_nsec);
return p;
}
-static inline __be32 *
-xdr_encode_sattr(__be32 *p, struct iattr *attr)
+static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
{
+ timep->tv_sec = be32_to_cpup(p++);
+ timep->tv_nsec = be32_to_cpup(p++);
+ return p;
+}
+
+/*
+ * sattr3
+ *
+ * enum time_how {
+ * DONT_CHANGE = 0,
+ * SET_TO_SERVER_TIME = 1,
+ * SET_TO_CLIENT_TIME = 2
+ * };
+ *
+ * union set_mode3 switch (bool set_it) {
+ * case TRUE:
+ * mode3 mode;
+ * default:
+ * void;
+ * };
+ *
+ * union set_uid3 switch (bool set_it) {
+ * case TRUE:
+ * uid3 uid;
+ * default:
+ * void;
+ * };
+ *
+ * union set_gid3 switch (bool set_it) {
+ * case TRUE:
+ * gid3 gid;
+ * default:
+ * void;
+ * };
+ *
+ * union set_size3 switch (bool set_it) {
+ * case TRUE:
+ * size3 size;
+ * default:
+ * void;
+ * };
+ *
+ * union set_atime switch (time_how set_it) {
+ * case SET_TO_CLIENT_TIME:
+ * nfstime3 atime;
+ * default:
+ * void;
+ * };
+ *
+ * union set_mtime switch (time_how set_it) {
+ * case SET_TO_CLIENT_TIME:
+ * nfstime3 mtime;
+ * default:
+ * void;
+ * };
+ *
+ * struct sattr3 {
+ * set_mode3 mode;
+ * set_uid3 uid;
+ * set_gid3 gid;
+ * set_size3 size;
+ * set_atime atime;
+ * set_mtime mtime;
+ * };
+ */
+static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
+{
+ u32 nbytes;
+ __be32 *p;
+
+ /*
+ * In order to make only a single xdr_reserve_space() call,
+ * pre-compute the total number of bytes to be reserved.
+ * Six boolean values, one for each set_foo field, are always
+ * present in the encoded result, so start there.
+ */
+ nbytes = 6 * 4;
+ if (attr->ia_valid & ATTR_MODE)
+ nbytes += 4;
+ if (attr->ia_valid & ATTR_UID)
+ nbytes += 4;
+ if (attr->ia_valid & ATTR_GID)
+ nbytes += 4;
+ if (attr->ia_valid & ATTR_SIZE)
+ nbytes += 8;
+ if (attr->ia_valid & ATTR_ATIME_SET)
+ nbytes += 8;
+ if (attr->ia_valid & ATTR_MTIME_SET)
+ nbytes += 8;
+ p = xdr_reserve_space(xdr, nbytes);
+
if (attr->ia_valid & ATTR_MODE) {
*p++ = xdr_one;
- *p++ = htonl(attr->ia_mode & S_IALLUGO);
- } else {
+ *p++ = cpu_to_be32(attr->ia_mode & S_IALLUGO);
+ } else
*p++ = xdr_zero;
- }
+
if (attr->ia_valid & ATTR_UID) {
*p++ = xdr_one;
- *p++ = htonl(attr->ia_uid);
- } else {
+ *p++ = cpu_to_be32(attr->ia_uid);
+ } else
*p++ = xdr_zero;
- }
+
if (attr->ia_valid & ATTR_GID) {
*p++ = xdr_one;
- *p++ = htonl(attr->ia_gid);
- } else {
+ *p++ = cpu_to_be32(attr->ia_gid);
+ } else
*p++ = xdr_zero;
- }
+
if (attr->ia_valid & ATTR_SIZE) {
*p++ = xdr_one;
- p = xdr_encode_hyper(p, (__u64) attr->ia_size);
- } else {
+ p = xdr_encode_hyper(p, (u64)attr->ia_size);
+ } else
*p++ = xdr_zero;
- }
+
if (attr->ia_valid & ATTR_ATIME_SET) {
*p++ = xdr_two;
- p = xdr_encode_time3(p, &attr->ia_atime);
+ p = xdr_encode_nfstime3(p, &attr->ia_atime);
} else if (attr->ia_valid & ATTR_ATIME) {
*p++ = xdr_one;
- } else {
+ } else
*p++ = xdr_zero;
- }
+
if (attr->ia_valid & ATTR_MTIME_SET) {
*p++ = xdr_two;
- p = xdr_encode_time3(p, &attr->ia_mtime);
+ xdr_encode_nfstime3(p, &attr->ia_mtime);
} else if (attr->ia_valid & ATTR_MTIME) {
- *p++ = xdr_one;
- } else {
- *p++ = xdr_zero;
- }
- return p;
+ *p = xdr_one;
+ } else
+ *p = xdr_zero;
}
-static inline __be32 *
-xdr_decode_wcc_attr(__be32 *p, struct nfs_fattr *fattr)
+/*
+ * fattr3
+ *
+ * struct fattr3 {
+ * ftype3 type;
+ * mode3 mode;
+ * uint32 nlink;
+ * uid3 uid;
+ * gid3 gid;
+ * size3 size;
+ * size3 used;
+ * specdata3 rdev;
+ * uint64 fsid;
+ * fileid3 fileid;
+ * nfstime3 atime;
+ * nfstime3 mtime;
+ * nfstime3 ctime;
+ * };
+ */
+static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
{
- p = xdr_decode_hyper(p, &fattr->pre_size);
- p = xdr_decode_time3(p, &fattr->pre_mtime);
- p = xdr_decode_time3(p, &fattr->pre_ctime);
+ umode_t fmode;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS3_fattr_sz << 2);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+
+ p = xdr_decode_ftype3(p, &fmode);
+
+ fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
+ fattr->nlink = be32_to_cpup(p++);
+ fattr->uid = be32_to_cpup(p++);
+ fattr->gid = be32_to_cpup(p++);
+
+ p = xdr_decode_size3(p, &fattr->size);
+ p = xdr_decode_size3(p, &fattr->du.nfs3.used);
+ p = xdr_decode_specdata3(p, &fattr->rdev);
+
+ p = xdr_decode_hyper(p, &fattr->fsid.major);
+ fattr->fsid.minor = 0;
+
+ p = xdr_decode_fileid3(p, &fattr->fileid);
+ p = xdr_decode_nfstime3(p, &fattr->atime);
+ p = xdr_decode_nfstime3(p, &fattr->mtime);
+ xdr_decode_nfstime3(p, &fattr->ctime);
+
+ fattr->valid |= NFS_ATTR_FATTR_V3;
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * post_op_attr
+ *
+ * union post_op_attr switch (bool attributes_follow) {
+ * case TRUE:
+ * fattr3 attributes;
+ * case FALSE:
+ * void;
+ * };
+ */
+static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (*p != xdr_zero)
+ return decode_fattr3(xdr, fattr);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * wcc_attr
+ * struct wcc_attr {
+ * size3 size;
+ * nfstime3 mtime;
+ * nfstime3 ctime;
+ * };
+ */
+static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS3_wcc_attr_sz << 2);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+
fattr->valid |= NFS_ATTR_FATTR_PRESIZE
| NFS_ATTR_FATTR_PREMTIME
| NFS_ATTR_FATTR_PRECTIME;
- return p;
-}
-static inline __be32 *
-xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr)
-{
- if (*p++)
- p = xdr_decode_fattr(p, fattr);
- return p;
+ p = xdr_decode_size3(p, &fattr->pre_size);
+ p = xdr_decode_nfstime3(p, &fattr->pre_mtime);
+ xdr_decode_nfstime3(p, &fattr->pre_ctime);
+
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
-static inline __be32 *
-xdr_decode_post_op_attr_stream(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+/*
+ * pre_op_attr
+ * union pre_op_attr switch (bool attributes_follow) {
+ * case TRUE:
+ * wcc_attr attributes;
+ * case FALSE:
+ * void;
+ * };
+ *
+ * wcc_data
+ *
+ * struct wcc_data {
+ * pre_op_attr before;
+ * post_op_attr after;
+ * };
+ */
+static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
{
__be32 *p;
p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
+ if (unlikely(p == NULL))
goto out_overflow;
- if (ntohl(*p++)) {
- p = xdr_inline_decode(xdr, 84);
- if (unlikely(!p))
- goto out_overflow;
- p = xdr_decode_fattr(p, fattr);
- }
- return p;
+ if (*p != xdr_zero)
+ return decode_wcc_attr(xdr, fattr);
+ return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
- return ERR_PTR(-EIO);
+ return -EIO;
}
-static inline __be32 *
-xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr)
+static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
{
- if (*p++)
- return xdr_decode_wcc_attr(p, fattr);
- return p;
+ int error;
+
+ error = decode_pre_op_attr(xdr, fattr);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, fattr);
+out:
+ return error;
}
+/*
+ * post_op_fh3
+ *
+ * union post_op_fh3 switch (bool handle_follows) {
+ * case TRUE:
+ * nfs_fh3 handle;
+ * case FALSE:
+ * void;
+ * };
+ */
+static int decode_post_op_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+ __be32 *p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (*p != xdr_zero)
+ return decode_nfs_fh3(xdr, fh);
+ zero_nfs_fh3(fh);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
-static inline __be32 *
-xdr_decode_wcc_data(__be32 *p, struct nfs_fattr *fattr)
+/*
+ * diropargs3
+ *
+ * struct diropargs3 {
+ * nfs_fh3 dir;
+ * filename3 name;
+ * };
+ */
+static void encode_diropargs3(struct xdr_stream *xdr, const struct nfs_fh *fh,
+ const char *name, u32 length)
{
- p = xdr_decode_pre_op_attr(p, fattr);
- return xdr_decode_post_op_attr(p, fattr);
+ encode_nfs_fh3(xdr, fh);
+ encode_filename3(xdr, name, length);
}
+
/*
- * NFS encode functions
+ * NFSv3 XDR encode functions
+ *
+ * NFSv3 argument types are defined in section 3.3 of RFC 1813:
+ * "NFS Version 3 Protocol Specification".
*/
/*
- * Encode file handle argument
+ * 3.3.1 GETATTR3args
+ *
+ * struct GETATTR3args {
+ * nfs_fh3 object;
+ * };
*/
-static int
-nfs3_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh)
+static void nfs3_xdr_enc_getattr3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_fh *fh)
{
- p = xdr_encode_fhandle(p, fh);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_nfs_fh3(xdr, fh);
}
/*
- * Encode SETATTR arguments
+ * 3.3.2 SETATTR3args
+ *
+ * union sattrguard3 switch (bool check) {
+ * case TRUE:
+ * nfstime3 obj_ctime;
+ * case FALSE:
+ * void;
+ * };
+ *
+ * struct SETATTR3args {
+ * nfs_fh3 object;
+ * sattr3 new_attributes;
+ * sattrguard3 guard;
+ * };
*/
-static int
-nfs3_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs3_sattrargs *args)
-{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_sattr(p, args->sattr);
- *p++ = htonl(args->guard);
- if (args->guard)
- p = xdr_encode_time3(p, &args->guardtime);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+static void encode_sattrguard3(struct xdr_stream *xdr,
+ const struct nfs3_sattrargs *args)
+{
+ __be32 *p;
+
+ if (args->guard) {
+ p = xdr_reserve_space(xdr, 4 + 8);
+ *p++ = xdr_one;
+ xdr_encode_nfstime3(p, &args->guardtime);
+ } else {
+ p = xdr_reserve_space(xdr, 4);
+ *p = xdr_zero;
+ }
+}
+
+static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_sattrargs *args)
+{
+ encode_nfs_fh3(xdr, args->fh);
+ encode_sattr3(xdr, args->sattr);
+ encode_sattrguard3(xdr, args);
}
/*
- * Encode directory ops argument
+ * 3.3.3 LOOKUP3args
+ *
+ * struct LOOKUP3args {
+ * diropargs3 what;
+ * };
*/
-static int
-nfs3_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs3_diropargs *args)
+static void nfs3_xdr_enc_lookup3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_diropargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name, args->len);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_diropargs3(xdr, args->fh, args->name, args->len);
}
/*
- * Encode REMOVE argument
+ * 3.3.4 ACCESS3args
+ *
+ * struct ACCESS3args {
+ * nfs_fh3 object;
+ * uint32 access;
+ * };
*/
-static int
-nfs3_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args)
+static void encode_access3args(struct xdr_stream *xdr,
+ const struct nfs3_accessargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name.name, args->name.len);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_nfs_fh3(xdr, args->fh);
+ encode_uint32(xdr, args->access);
+}
+
+static void nfs3_xdr_enc_access3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_accessargs *args)
+{
+ encode_access3args(xdr, args);
}
/*
- * Encode access() argument
+ * 3.3.5 READLINK3args
+ *
+ * struct READLINK3args {
+ * nfs_fh3 symlink;
+ * };
*/
-static int
-nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *args)
+static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_readlinkargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
- *p++ = htonl(args->access);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_nfs_fh3(xdr, args->fh);
+ prepare_reply_buffer(req, args->pages, args->pgbase,
+ args->pglen, NFS3_readlinkres_sz);
}
/*
- * Arguments to a READ call. Since we read data directly into the page
- * cache, we also set up the reply iovec here so that iov[1] points
- * exactly to the page we want to fetch.
+ * 3.3.6 READ3args
+ *
+ * struct READ3args {
+ * nfs_fh3 file;
+ * offset3 offset;
+ * count3 count;
+ * };
*/
-static int
-nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
+static void encode_read3args(struct xdr_stream *xdr,
+ const struct nfs_readargs *args)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
- u32 count = args->count;
+ __be32 *p;
+
+ encode_nfs_fh3(xdr, args->fh);
- p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_reserve_space(xdr, 8 + 4);
p = xdr_encode_hyper(p, args->offset);
- *p++ = htonl(count);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ *p = cpu_to_be32(args->count);
+}
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen,
- args->pages, args->pgbase, count);
+static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_readargs *args)
+{
+ encode_read3args(xdr, args);
+ prepare_reply_buffer(req, args->pages, args->pgbase,
+ args->count, NFS3_readres_sz);
req->rq_rcv_buf.flags |= XDRBUF_READ;
- return 0;
}
/*
- * Write arguments. Splice the buffer to be written into the iovec.
+ * 3.3.7 WRITE3args
+ *
+ * enum stable_how {
+ * UNSTABLE = 0,
+ * DATA_SYNC = 1,
+ * FILE_SYNC = 2
+ * };
+ *
+ * struct WRITE3args {
+ * nfs_fh3 file;
+ * offset3 offset;
+ * count3 count;
+ * stable_how stable;
+ * opaque data<>;
+ * };
*/
-static int
-nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void encode_write3args(struct xdr_stream *xdr,
+ const struct nfs_writeargs *args)
{
- struct xdr_buf *sndbuf = &req->rq_snd_buf;
- u32 count = args->count;
+ __be32 *p;
+
+ encode_nfs_fh3(xdr, args->fh);
- p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_reserve_space(xdr, 8 + 4 + 4 + 4);
p = xdr_encode_hyper(p, args->offset);
- *p++ = htonl(count);
- *p++ = htonl(args->stable);
- *p++ = htonl(count);
- sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
-
- /* Copy the page array */
- xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
- sndbuf->flags |= XDRBUF_WRITE;
- return 0;
+ *p++ = cpu_to_be32(args->count);
+ *p++ = cpu_to_be32(args->stable);
+ *p = cpu_to_be32(args->count);
+ xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
+}
+
+static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_writeargs *args)
+{
+ encode_write3args(xdr, args);
+ xdr->buf->flags |= XDRBUF_WRITE;
}
/*
- * Encode CREATE arguments
+ * 3.3.8 CREATE3args
+ *
+ * enum createmode3 {
+ * UNCHECKED = 0,
+ * GUARDED = 1,
+ * EXCLUSIVE = 2
+ * };
+ *
+ * union createhow3 switch (createmode3 mode) {
+ * case UNCHECKED:
+ * case GUARDED:
+ * sattr3 obj_attributes;
+ * case EXCLUSIVE:
+ * createverf3 verf;
+ * };
+ *
+ * struct CREATE3args {
+ * diropargs3 where;
+ * createhow3 how;
+ * };
*/
-static int
-nfs3_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs3_createargs *args)
+static void encode_createhow3(struct xdr_stream *xdr,
+ const struct nfs3_createargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name, args->len);
-
- *p++ = htonl(args->createmode);
- if (args->createmode == NFS3_CREATE_EXCLUSIVE) {
- *p++ = args->verifier[0];
- *p++ = args->verifier[1];
- } else
- p = xdr_encode_sattr(p, args->sattr);
+ encode_uint32(xdr, args->createmode);
+ switch (args->createmode) {
+ case NFS3_CREATE_UNCHECKED:
+ case NFS3_CREATE_GUARDED:
+ encode_sattr3(xdr, args->sattr);
+ break;
+ case NFS3_CREATE_EXCLUSIVE:
+ encode_createverf3(xdr, args->verifier);
+ break;
+ default:
+ BUG();
+ }
+}
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_createargs *args)
+{
+ encode_diropargs3(xdr, args->fh, args->name, args->len);
+ encode_createhow3(xdr, args);
}
/*
- * Encode MKDIR arguments
+ * 3.3.9 MKDIR3args
+ *
+ * struct MKDIR3args {
+ * diropargs3 where;
+ * sattr3 attributes;
+ * };
*/
-static int
-nfs3_xdr_mkdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mkdirargs *args)
+static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_mkdirargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name, args->len);
- p = xdr_encode_sattr(p, args->sattr);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_diropargs3(xdr, args->fh, args->name, args->len);
+ encode_sattr3(xdr, args->sattr);
}
/*
- * Encode SYMLINK arguments
+ * 3.3.10 SYMLINK3args
+ *
+ * struct symlinkdata3 {
+ * sattr3 symlink_attributes;
+ * nfspath3 symlink_data;
+ * };
+ *
+ * struct SYMLINK3args {
+ * diropargs3 where;
+ * symlinkdata3 symlink;
+ * };
*/
-static int
-nfs3_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_symlinkargs *args)
+static void encode_symlinkdata3(struct xdr_stream *xdr,
+ const struct nfs3_symlinkargs *args)
{
- p = xdr_encode_fhandle(p, args->fromfh);
- p = xdr_encode_array(p, args->fromname, args->fromlen);
- p = xdr_encode_sattr(p, args->sattr);
- *p++ = htonl(args->pathlen);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ encode_sattr3(xdr, args->sattr);
+ encode_nfspath3(xdr, args->pages, args->pathlen);
+}
- /* Copy the page */
- xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen);
- return 0;
+static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_symlinkargs *args)
+{
+ encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
+ encode_symlinkdata3(xdr, args);
}
/*
- * Encode MKNOD arguments
+ * 3.3.11 MKNOD3args
+ *
+ * struct devicedata3 {
+ * sattr3 dev_attributes;
+ * specdata3 spec;
+ * };
+ *
+ * union mknoddata3 switch (ftype3 type) {
+ * case NF3CHR:
+ * case NF3BLK:
+ * devicedata3 device;
+ * case NF3SOCK:
+ * case NF3FIFO:
+ * sattr3 pipe_attributes;
+ * default:
+ * void;
+ * };
+ *
+ * struct MKNOD3args {
+ * diropargs3 where;
+ * mknoddata3 what;
+ * };
*/
-static int
-nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args)
-{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name, args->len);
- *p++ = htonl(args->type);
- p = xdr_encode_sattr(p, args->sattr);
- if (args->type == NF3CHR || args->type == NF3BLK) {
- *p++ = htonl(MAJOR(args->rdev));
- *p++ = htonl(MINOR(args->rdev));
+static void encode_devicedata3(struct xdr_stream *xdr,
+ const struct nfs3_mknodargs *args)
+{
+ encode_sattr3(xdr, args->sattr);
+ encode_specdata3(xdr, args->rdev);
+}
+
+static void encode_mknoddata3(struct xdr_stream *xdr,
+ const struct nfs3_mknodargs *args)
+{
+ encode_ftype3(xdr, args->type);
+ switch (args->type) {
+ case NF3CHR:
+ case NF3BLK:
+ encode_devicedata3(xdr, args);
+ break;
+ case NF3SOCK:
+ case NF3FIFO:
+ encode_sattr3(xdr, args->sattr);
+ break;
+ case NF3REG:
+ case NF3DIR:
+ break;
+ default:
+ BUG();
}
+}
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_mknodargs *args)
+{
+ encode_diropargs3(xdr, args->fh, args->name, args->len);
+ encode_mknoddata3(xdr, args);
}
/*
- * Encode RENAME arguments
+ * 3.3.12 REMOVE3args
+ *
+ * struct REMOVE3args {
+ * diropargs3 object;
+ * };
*/
-static int
-nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
-{
- p = xdr_encode_fhandle(p, args->old_dir);
- p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
- p = xdr_encode_fhandle(p, args->new_dir);
- p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_removeargs *args)
+{
+ encode_diropargs3(xdr, args->fh, args->name.name, args->name.len);
}
/*
- * Encode LINK arguments
+ * 3.3.14 RENAME3args
+ *
+ * struct RENAME3args {
+ * diropargs3 from;
+ * diropargs3 to;
+ * };
*/
-static int
-nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
+static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_renameargs *args)
{
- p = xdr_encode_fhandle(p, args->fromfh);
- p = xdr_encode_fhandle(p, args->tofh);
- p = xdr_encode_array(p, args->toname, args->tolen);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ const struct qstr *old = args->old_name;
+ const struct qstr *new = args->new_name;
+
+ encode_diropargs3(xdr, args->old_dir, old->name, old->len);
+ encode_diropargs3(xdr, args->new_dir, new->name, new->len);
}
/*
- * Encode arguments to readdir call
+ * 3.3.15 LINK3args
+ *
+ * struct LINK3args {
+ * nfs_fh3 file;
+ * diropargs3 link;
+ * };
*/
-static int
-nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
+static void nfs3_xdr_enc_link3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_linkargs *args)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
- u32 count = args->count;
-
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_hyper(p, args->cookie);
- *p++ = args->verf[0];
- *p++ = args->verf[1];
- if (args->plus) {
- /* readdirplus: need dircount + buffer size.
- * We just make sure we make dircount big enough */
- *p++ = htonl(count >> 3);
- }
- *p++ = htonl(count);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
- return 0;
+ encode_nfs_fh3(xdr, args->fromfh);
+ encode_diropargs3(xdr, args->tofh, args->toname, args->tolen);
}
/*
- * Decode the result of a readdir call.
- * We just check for syntactical correctness.
+ * 3.3.16 READDIR3args
+ *
+ * struct READDIR3args {
+ * nfs_fh3 dir;
+ * cookie3 cookie;
+ * cookieverf3 cookieverf;
+ * count3 count;
+ * };
*/
-static int
-nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res)
+static void encode_readdir3args(struct xdr_stream *xdr,
+ const struct nfs3_readdirargs *args)
{
- struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- struct kvec *iov = rcvbuf->head;
- struct page **page;
- size_t hdrlen;
- u32 recvd, pglen;
- int status, nr = 0;
-
- status = ntohl(*p++);
- /* Decode post_op_attrs */
- p = xdr_decode_post_op_attr(p, res->dir_attr);
- if (status)
- return nfs_stat_to_errno(status);
- /* Decode verifier cookie */
- if (res->verf) {
- res->verf[0] = *p++;
- res->verf[1] = *p++;
- } else {
- p += 2;
- }
+ __be32 *p;
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
- if (iov->iov_len < hdrlen) {
- dprintk("NFS: READDIR reply header overflowed:"
- "length %Zu > %Zu\n", hdrlen, iov->iov_len);
- return -errno_NFSERR_IO;
- } else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
- xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
- }
+ encode_nfs_fh3(xdr, args->fh);
- pglen = rcvbuf->page_len;
- recvd = rcvbuf->len - hdrlen;
- if (pglen > recvd)
- pglen = recvd;
- page = rcvbuf->pages;
+ p = xdr_reserve_space(xdr, 8 + NFS3_COOKIEVERFSIZE + 4);
+ p = xdr_encode_cookie3(p, args->cookie);
+ p = xdr_encode_cookieverf3(p, args->verf);
+ *p = cpu_to_be32(args->count);
+}
- return nr;
+static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_readdirargs *args)
+{
+ encode_readdir3args(xdr, args);
+ prepare_reply_buffer(req, args->pages, 0,
+ args->count, NFS3_readdirres_sz);
}
-__be32 *
-nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
+/*
+ * 3.3.17 READDIRPLUS3args
+ *
+ * struct READDIRPLUS3args {
+ * nfs_fh3 dir;
+ * cookie3 cookie;
+ * cookieverf3 cookieverf;
+ * count3 dircount;
+ * count3 maxcount;
+ * };
+ */
+static void encode_readdirplus3args(struct xdr_stream *xdr,
+ const struct nfs3_readdirargs *args)
{
__be32 *p;
- struct nfs_entry old = *entry;
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- goto out_overflow;
- if (!ntohl(*p++)) {
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- goto out_overflow;
- if (!ntohl(*p++))
- return ERR_PTR(-EAGAIN);
- entry->eof = 1;
- return ERR_PTR(-EBADCOOKIE);
- }
+ encode_nfs_fh3(xdr, args->fh);
- p = xdr_inline_decode(xdr, 12);
- if (unlikely(!p))
- goto out_overflow;
- p = xdr_decode_hyper(p, &entry->ino);
- entry->len = ntohl(*p++);
-
- p = xdr_inline_decode(xdr, entry->len + 8);
- if (unlikely(!p))
- goto out_overflow;
- entry->name = (const char *) p;
- p += XDR_QUADLEN(entry->len);
- entry->prev_cookie = entry->cookie;
- p = xdr_decode_hyper(p, &entry->cookie);
-
- if (plus) {
- entry->fattr->valid = 0;
- p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
- if (IS_ERR(p))
- goto out_overflow_exit;
- /* In fact, a post_op_fh3: */
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- goto out_overflow;
- if (*p++) {
- p = xdr_decode_fhandle_stream(xdr, entry->fh);
- if (IS_ERR(p))
- goto out_overflow_exit;
- /* Ugh -- server reply was truncated */
- if (p == NULL) {
- dprintk("NFS: FH truncated\n");
- *entry = old;
- return ERR_PTR(-EAGAIN);
- }
- } else
- memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
- }
+ p = xdr_reserve_space(xdr, 8 + NFS3_COOKIEVERFSIZE + 4 + 4);
+ p = xdr_encode_cookie3(p, args->cookie);
+ p = xdr_encode_cookieverf3(p, args->verf);
- p = xdr_inline_peek(xdr, 8);
- if (p != NULL)
- entry->eof = !p[0] && p[1];
- else
- entry->eof = 0;
+ /*
+ * readdirplus: need dircount + buffer size.
+ * We just make sure we make dircount big enough
+ */
+ *p++ = cpu_to_be32(args->count >> 3);
- return p;
+ *p = cpu_to_be32(args->count);
+}
-out_overflow:
- print_overflow_msg(__func__, xdr);
-out_overflow_exit:
- return ERR_PTR(-EIO);
+static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_readdirargs *args)
+{
+ encode_readdirplus3args(xdr, args);
+ prepare_reply_buffer(req, args->pages, 0,
+ args->count, NFS3_readdirres_sz);
}
/*
- * Encode COMMIT arguments
+ * 3.3.21 COMMIT3args
+ *
+ * struct COMMIT3args {
+ * nfs_fh3 file;
+ * offset3 offset;
+ * count3 count;
+ * };
*/
-static int
-nfs3_xdr_commitargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void encode_commit3args(struct xdr_stream *xdr,
+ const struct nfs_writeargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
+ __be32 *p;
+
+ encode_nfs_fh3(xdr, args->fh);
+
+ p = xdr_reserve_space(xdr, 8 + 4);
p = xdr_encode_hyper(p, args->offset);
- *p++ = htonl(args->count);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ *p = cpu_to_be32(args->count);
}
-#ifdef CONFIG_NFS_V3_ACL
-/*
- * Encode GETACL arguments
- */
-static int
-nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
- struct nfs3_getaclargs *args)
+static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_writeargs *args)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
+ encode_commit3args(xdr, args);
+}
- p = xdr_encode_fhandle(p, args->fh);
- *p++ = htonl(args->mask);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+#ifdef CONFIG_NFS_V3_ACL
- if (args->mask & (NFS_ACL | NFS_DFACL)) {
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack +
- ACL3_getaclres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0,
- NFSACL_MAXPAGES << PAGE_SHIFT);
- }
- return 0;
+static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_getaclargs *args)
+{
+ encode_nfs_fh3(xdr, args->fh);
+ encode_uint32(xdr, args->mask);
+ if (args->mask & (NFS_ACL | NFS_DFACL))
+ prepare_reply_buffer(req, args->pages, 0,
+ NFSACL_MAXPAGES << PAGE_SHIFT,
+ ACL3_getaclres_sz);
}
-/*
- * Encode SETACL arguments
- */
-static int
-nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
- struct nfs3_setaclargs *args)
+static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_setaclargs *args)
{
- struct xdr_buf *buf = &req->rq_snd_buf;
unsigned int base;
- int err;
-
- p = xdr_encode_fhandle(p, NFS_FH(args->inode));
- *p++ = htonl(args->mask);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- base = req->rq_slen;
+ int error;
+ encode_nfs_fh3(xdr, NFS_FH(args->inode));
+ encode_uint32(xdr, args->mask);
if (args->npages != 0)
- xdr_encode_pages(buf, args->pages, 0, args->len);
- else
- req->rq_slen = xdr_adjust_iovec(req->rq_svec,
- p + XDR_QUADLEN(args->len));
+ xdr_write_pages(xdr, args->pages, 0, args->len);
- err = nfsacl_encode(buf, base, args->inode,
+ base = req->rq_slen;
+ error = nfsacl_encode(xdr->buf, base, args->inode,
(args->mask & NFS_ACL) ?
args->acl_access : NULL, 1, 0);
- if (err > 0)
- err = nfsacl_encode(buf, base + err, args->inode,
- (args->mask & NFS_DFACL) ?
- args->acl_default : NULL, 1,
- NFS_ACL_DEFAULT);
- return (err > 0) ? 0 : err;
+ BUG_ON(error < 0);
+ error = nfsacl_encode(xdr->buf, base + error, args->inode,
+ (args->mask & NFS_DFACL) ?
+ args->acl_default : NULL, 1,
+ NFS_ACL_DEFAULT);
+ BUG_ON(error < 0);
}
+
#endif /* CONFIG_NFS_V3_ACL */
/*
- * NFS XDR decode functions
+ * NFSv3 XDR decode functions
+ *
+ * NFSv3 result types are defined in section 3.3 of RFC 1813:
+ * "NFS Version 3 Protocol Specification".
*/
/*
- * Decode attrstat reply.
+ * 3.3.1 GETATTR3res
+ *
+ * struct GETATTR3resok {
+ * fattr3 obj_attributes;
+ * };
+ *
+ * union GETATTR3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * GETATTR3resok resok;
+ * default:
+ * void;
+ * };
*/
-static int
-nfs3_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
+static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fattr *result)
{
- int status;
-
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
- xdr_decode_fattr(p, fattr);
- return 0;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_fattr3(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode status+wcc_data reply
- * SATTR, REMOVE, RMDIR
+ * 3.3.2 SETATTR3res
+ *
+ * struct SETATTR3resok {
+ * wcc_data obj_wcc;
+ * };
+ *
+ * struct SETATTR3resfail {
+ * wcc_data obj_wcc;
+ * };
+ *
+ * union SETATTR3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * SETATTR3resok resok;
+ * default:
+ * SETATTR3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
+static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fattr *result)
{
- int status;
-
- if ((status = ntohl(*p++)))
- status = nfs_stat_to_errno(status);
- xdr_decode_wcc_data(p, fattr);
- return status;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
-static int
-nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res)
+/*
+ * 3.3.3 LOOKUP3res
+ *
+ * struct LOOKUP3resok {
+ * nfs_fh3 object;
+ * post_op_attr obj_attributes;
+ * post_op_attr dir_attributes;
+ * };
+ *
+ * struct LOOKUP3resfail {
+ * post_op_attr dir_attributes;
+ * };
+ *
+ * union LOOKUP3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * LOOKUP3resok resok;
+ * default:
+ * LOOKUP3resfail resfail;
+ * };
+ */
+static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs3_diropres *result)
{
- return nfs3_xdr_wccstat(req, p, res->dir_attr);
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_nfs_fh3(xdr, result->fh);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->dir_attr);
+out:
+ return error;
+out_default:
+ error = decode_post_op_attr(xdr, result->dir_attr);
+ if (unlikely(error))
+ goto out;
+ return nfs_stat_to_errno(status);
}
/*
- * Decode LOOKUP reply
+ * 3.3.4 ACCESS3res
+ *
+ * struct ACCESS3resok {
+ * post_op_attr obj_attributes;
+ * uint32 access;
+ * };
+ *
+ * struct ACCESS3resfail {
+ * post_op_attr obj_attributes;
+ * };
+ *
+ * union ACCESS3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * ACCESS3resok resok;
+ * default:
+ * ACCESS3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_lookupres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
+static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs3_accessres *result)
{
- int status;
-
- if ((status = ntohl(*p++))) {
- status = nfs_stat_to_errno(status);
- } else {
- if (!(p = xdr_decode_fhandle(p, res->fh)))
- return -errno_NFSERR_IO;
- p = xdr_decode_post_op_attr(p, res->fattr);
- }
- xdr_decode_post_op_attr(p, res->dir_attr);
- return status;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_uint32(xdr, &result->access);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode ACCESS reply
+ * 3.3.5 READLINK3res
+ *
+ * struct READLINK3resok {
+ * post_op_attr symlink_attributes;
+ * nfspath3 data;
+ * };
+ *
+ * struct READLINK3resfail {
+ * post_op_attr symlink_attributes;
+ * };
+ *
+ * union READLINK3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * READLINK3resok resok;
+ * default:
+ * READLINK3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
+static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fattr *result)
{
- int status = ntohl(*p++);
-
- p = xdr_decode_post_op_attr(p, res->fattr);
- if (status)
- return nfs_stat_to_errno(status);
- res->access = ntohl(*p++);
- return 0;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_nfspath3(xdr);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
-static int
-nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
+/*
+ * 3.3.6 READ3res
+ *
+ * struct READ3resok {
+ * post_op_attr file_attributes;
+ * count3 count;
+ * bool eof;
+ * opaque data<>;
+ * };
+ *
+ * struct READ3resfail {
+ * post_op_attr file_attributes;
+ * };
+ *
+ * union READ3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * READ3resok resok;
+ * default:
+ * READ3resfail resfail;
+ * };
+ */
+static int decode_read3resok(struct xdr_stream *xdr,
+ struct nfs_readres *result)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
+ u32 eof, count, ocount, recvd;
+ size_t hdrlen;
+ __be32 *p;
- p = xdr_encode_fhandle(p, args->fh);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ p = xdr_inline_decode(xdr, 4 + 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ count = be32_to_cpup(p++);
+ eof = be32_to_cpup(p++);
+ ocount = be32_to_cpup(p++);
+ if (unlikely(ocount != count))
+ goto out_mismatch;
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ recvd = xdr->buf->len - hdrlen;
+ if (unlikely(count > recvd))
+ goto out_cheating;
+
+out:
+ xdr_read_pages(xdr, count);
+ result->eof = eof;
+ result->count = count;
+ return count;
+out_mismatch:
+ dprintk("NFS: READ count doesn't match length of opaque: "
+ "count %u != ocount %u\n", count, ocount);
+ return -EIO;
+out_cheating:
+ dprintk("NFS: server cheating in read result: "
+ "count %u > recvd %u\n", count, recvd);
+ count = recvd;
+ eof = 0;
+ goto out;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen);
- return 0;
+static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_readres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+ error = decode_read3resok(xdr, result);
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode READLINK reply
+ * 3.3.7 WRITE3res
+ *
+ * enum stable_how {
+ * UNSTABLE = 0,
+ * DATA_SYNC = 1,
+ * FILE_SYNC = 2
+ * };
+ *
+ * struct WRITE3resok {
+ * wcc_data file_wcc;
+ * count3 count;
+ * stable_how committed;
+ * writeverf3 verf;
+ * };
+ *
+ * struct WRITE3resfail {
+ * wcc_data file_wcc;
+ * };
+ *
+ * union WRITE3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * WRITE3resok resok;
+ * default:
+ * WRITE3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
+static int decode_write3resok(struct xdr_stream *xdr,
+ struct nfs_writeres *result)
{
- struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- struct kvec *iov = rcvbuf->head;
- size_t hdrlen;
- u32 len, recvd;
- int status;
-
- status = ntohl(*p++);
- p = xdr_decode_post_op_attr(p, fattr);
-
- if (status != 0)
- return nfs_stat_to_errno(status);
-
- /* Convert length of symlink */
- len = ntohl(*p++);
- if (len >= rcvbuf->page_len) {
- dprintk("nfs: server returned giant symlink!\n");
- return -ENAMETOOLONG;
- }
+ __be32 *p;
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
- if (iov->iov_len < hdrlen) {
- dprintk("NFS: READLINK reply header overflowed:"
- "length %Zu > %Zu\n", hdrlen, iov->iov_len);
- return -errno_NFSERR_IO;
- } else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READLINK header is short. "
- "iovec will be shifted.\n");
- xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
- }
- recvd = req->rq_rcv_buf.len - hdrlen;
- if (recvd < len) {
- dprintk("NFS: server cheating in readlink reply: "
- "count %u > recvd %u\n", len, recvd);
- return -EIO;
- }
+ p = xdr_inline_decode(xdr, 4 + 4 + NFS3_WRITEVERFSIZE);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ result->count = be32_to_cpup(p++);
+ result->verf->committed = be32_to_cpup(p++);
+ if (unlikely(result->verf->committed > NFS_FILE_SYNC))
+ goto out_badvalue;
+ memcpy(result->verf->verifier, p, NFS3_WRITEVERFSIZE);
+ return result->count;
+out_badvalue:
+ dprintk("NFS: bad stable_how value: %u\n", result->verf->committed);
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
- xdr_terminate_string(rcvbuf, len);
- return 0;
+static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_writeres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+ error = decode_write3resok(xdr, result);
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode READ reply
+ * 3.3.8 CREATE3res
+ *
+ * struct CREATE3resok {
+ * post_op_fh3 obj;
+ * post_op_attr obj_attributes;
+ * wcc_data dir_wcc;
+ * };
+ *
+ * struct CREATE3resfail {
+ * wcc_data dir_wcc;
+ * };
+ *
+ * union CREATE3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * CREATE3resok resok;
+ * default:
+ * CREATE3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
+static int decode_create3resok(struct xdr_stream *xdr,
+ struct nfs3_diropres *result)
{
- struct kvec *iov = req->rq_rcv_buf.head;
- size_t hdrlen;
- u32 count, ocount, recvd;
- int status;
+ int error;
+
+ error = decode_post_op_fh3(xdr, result->fh);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ /* The server isn't required to return a file handle.
+ * If it didn't, force the client to perform a LOOKUP
+ * to determine the correct file handle and attribute
+ * values for the new object. */
+ if (result->fh->size == 0)
+ result->fattr->valid = 0;
+ error = decode_wcc_data(xdr, result->dir_attr);
+out:
+ return error;
+}
- status = ntohl(*p++);
- p = xdr_decode_post_op_attr(p, res->fattr);
+static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs3_diropres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_create3resok(xdr, result);
+out:
+ return error;
+out_default:
+ error = decode_wcc_data(xdr, result->dir_attr);
+ if (unlikely(error))
+ goto out;
+ return nfs_stat_to_errno(status);
+}
- if (status != 0)
- return nfs_stat_to_errno(status);
+/*
+ * 3.3.12 REMOVE3res
+ *
+ * struct REMOVE3resok {
+ * wcc_data dir_wcc;
+ * };
+ *
+ * struct REMOVE3resfail {
+ * wcc_data dir_wcc;
+ * };
+ *
+ * union REMOVE3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * REMOVE3resok resok;
+ * default:
+ * REMOVE3resfail resfail;
+ * };
+ */
+static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_removeres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result->dir_attr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
+}
- /* Decode reply count and EOF flag. NFSv3 is somewhat redundant
- * in that it puts the count both in the res struct and in the
- * opaque data count. */
- count = ntohl(*p++);
- res->eof = ntohl(*p++);
- ocount = ntohl(*p++);
+/*
+ * 3.3.14 RENAME3res
+ *
+ * struct RENAME3resok {
+ * wcc_data fromdir_wcc;
+ * wcc_data todir_wcc;
+ * };
+ *
+ * struct RENAME3resfail {
+ * wcc_data fromdir_wcc;
+ * wcc_data todir_wcc;
+ * };
+ *
+ * union RENAME3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * RENAME3resok resok;
+ * default:
+ * RENAME3resfail resfail;
+ * };
+ */
+static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_renameres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result->old_fattr);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result->new_fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
+}
- if (ocount != count) {
- dprintk("NFS: READ count doesn't match RPC opaque count.\n");
- return -errno_NFSERR_IO;
- }
+/*
+ * 3.3.15 LINK3res
+ *
+ * struct LINK3resok {
+ * post_op_attr file_attributes;
+ * wcc_data linkdir_wcc;
+ * };
+ *
+ * struct LINK3resfail {
+ * post_op_attr file_attributes;
+ * wcc_data linkdir_wcc;
+ * };
+ *
+ * union LINK3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * LINK3resok resok;
+ * default:
+ * LINK3resfail resfail;
+ * };
+ */
+static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs3_linkres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result->dir_attr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
+}
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
- if (iov->iov_len < hdrlen) {
- dprintk("NFS: READ reply header overflowed:"
- "length %Zu > %Zu\n", hdrlen, iov->iov_len);
- return -errno_NFSERR_IO;
- } else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READ header is short. iovec will be shifted.\n");
- xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
- }
+/**
+ * nfs3_decode_dirent - Decode a single NFSv3 directory entry stored in
+ * the local page cache
+ * @xdr: XDR stream where entry resides
+ * @entry: buffer to fill in with entry data
+ * @plus: boolean indicating whether this should be a readdirplus entry
+ *
+ * Returns zero if successful, otherwise a negative errno value is
+ * returned.
+ *
+ * This function is not invoked during READDIR reply decoding, but
+ * rather whenever an application invokes the getdents(2) system call
+ * on a directory already in our cache.
+ *
+ * 3.3.16 entry3
+ *
+ * struct entry3 {
+ * fileid3 fileid;
+ * filename3 name;
+ * cookie3 cookie;
+ * fhandle3 filehandle;
+ * post_op_attr3 attributes;
+ * entry3 *nextentry;
+ * };
+ *
+ * 3.3.17 entryplus3
+ * struct entryplus3 {
+ * fileid3 fileid;
+ * filename3 name;
+ * cookie3 cookie;
+ * post_op_attr name_attributes;
+ * post_op_fh3 name_handle;
+ * entryplus3 *nextentry;
+ * };
+ */
+int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+ int plus)
+{
+ struct nfs_entry old = *entry;
+ __be32 *p;
+ int error;
- recvd = req->rq_rcv_buf.len - hdrlen;
- if (count > recvd) {
- dprintk("NFS: server cheating in read reply: "
- "count %u > recvd %u\n", count, recvd);
- count = recvd;
- res->eof = 0;
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (*p == xdr_zero) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (*p == xdr_zero)
+ return -EAGAIN;
+ entry->eof = 1;
+ return -EBADCOOKIE;
}
- if (count < res->count)
- res->count = count;
+ error = decode_fileid3(xdr, &entry->ino);
+ if (unlikely(error))
+ return error;
- return count;
-}
+ error = decode_inline_filename3(xdr, &entry->name, &entry->len);
+ if (unlikely(error))
+ return error;
-/*
- * Decode WRITE response
- */
-static int
-nfs3_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
-{
- int status;
+ entry->prev_cookie = entry->cookie;
+ error = decode_cookie3(xdr, &entry->cookie);
+ if (unlikely(error))
+ return error;
- status = ntohl(*p++);
- p = xdr_decode_wcc_data(p, res->fattr);
+ entry->d_type = DT_UNKNOWN;
- if (status != 0)
- return nfs_stat_to_errno(status);
+ if (plus) {
+ entry->fattr->valid = 0;
+ error = decode_post_op_attr(xdr, entry->fattr);
+ if (unlikely(error))
+ return error;
+ if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
+ entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
- res->count = ntohl(*p++);
- res->verf->committed = (enum nfs3_stable_how)ntohl(*p++);
- res->verf->verifier[0] = *p++;
- res->verf->verifier[1] = *p++;
+ /* In fact, a post_op_fh3: */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (*p != xdr_zero) {
+ error = decode_nfs_fh3(xdr, entry->fh);
+ if (unlikely(error)) {
+ if (error == -E2BIG)
+ goto out_truncated;
+ return error;
+ }
+ } else
+ zero_nfs_fh3(entry->fh);
+ }
- return res->count;
-}
+ return 0;
-/*
- * Decode a CREATE response
- */
-static int
-nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
-{
- int status;
-
- status = ntohl(*p++);
- if (status == 0) {
- if (*p++) {
- if (!(p = xdr_decode_fhandle(p, res->fh)))
- return -errno_NFSERR_IO;
- p = xdr_decode_post_op_attr(p, res->fattr);
- } else {
- memset(res->fh, 0, sizeof(*res->fh));
- /* Do decode post_op_attr but set it to NULL */
- p = xdr_decode_post_op_attr(p, res->fattr);
- res->fattr->valid = 0;
- }
- } else {
- status = nfs_stat_to_errno(status);
- }
- p = xdr_decode_wcc_data(p, res->dir_attr);
- return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EAGAIN;
+out_truncated:
+ dprintk("NFS: directory entry contains invalid file handle\n");
+ *entry = old;
+ return -EAGAIN;
}
/*
- * Decode RENAME reply
+ * 3.3.16 READDIR3res
+ *
+ * struct dirlist3 {
+ * entry3 *entries;
+ * bool eof;
+ * };
+ *
+ * struct READDIR3resok {
+ * post_op_attr dir_attributes;
+ * cookieverf3 cookieverf;
+ * dirlist3 reply;
+ * };
+ *
+ * struct READDIR3resfail {
+ * post_op_attr dir_attributes;
+ * };
+ *
+ * union READDIR3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * READDIR3resok resok;
+ * default:
+ * READDIR3resfail resfail;
+ * };
+ *
+ * Read the directory contents into the page cache, but otherwise
+ * don't touch them. The actual decoding is done by nfs3_decode_entry()
+ * during subsequent nfs_readdir() calls.
*/
-static int
-nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res)
+static int decode_dirlist3(struct xdr_stream *xdr)
{
- int status;
+ u32 recvd, pglen;
+ size_t hdrlen;
- if ((status = ntohl(*p++)) != 0)
- status = nfs_stat_to_errno(status);
- p = xdr_decode_wcc_data(p, res->old_fattr);
- p = xdr_decode_wcc_data(p, res->new_fattr);
- return status;
+ pglen = xdr->buf->page_len;
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ recvd = xdr->buf->len - hdrlen;
+ if (unlikely(pglen > recvd))
+ goto out_cheating;
+out:
+ xdr_read_pages(xdr, pglen);
+ return pglen;
+out_cheating:
+ dprintk("NFS: server cheating in readdir result: "
+ "pglen %u > recvd %u\n", pglen, recvd);
+ pglen = recvd;
+ goto out;
}
-/*
- * Decode LINK reply
- */
-static int
-nfs3_xdr_linkres(struct rpc_rqst *req, __be32 *p, struct nfs3_linkres *res)
+static int decode_readdir3resok(struct xdr_stream *xdr,
+ struct nfs3_readdirres *result)
{
- int status;
+ int error;
+
+ error = decode_post_op_attr(xdr, result->dir_attr);
+ if (unlikely(error))
+ goto out;
+ /* XXX: do we need to check if result->verf != NULL ? */
+ error = decode_cookieverf3(xdr, result->verf);
+ if (unlikely(error))
+ goto out;
+ error = decode_dirlist3(xdr);
+out:
+ return error;
+}
- if ((status = ntohl(*p++)) != 0)
- status = nfs_stat_to_errno(status);
- p = xdr_decode_post_op_attr(p, res->fattr);
- p = xdr_decode_wcc_data(p, res->dir_attr);
- return status;
+static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs3_readdirres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_readdir3resok(xdr, result);
+out:
+ return error;
+out_default:
+ error = decode_post_op_attr(xdr, result->dir_attr);
+ if (unlikely(error))
+ goto out;
+ return nfs_stat_to_errno(status);
}
/*
- * Decode FSSTAT reply
+ * 3.3.18 FSSTAT3res
+ *
+ * struct FSSTAT3resok {
+ * post_op_attr obj_attributes;
+ * size3 tbytes;
+ * size3 fbytes;
+ * size3 abytes;
+ * size3 tfiles;
+ * size3 ffiles;
+ * size3 afiles;
+ * uint32 invarsec;
+ * };
+ *
+ * struct FSSTAT3resfail {
+ * post_op_attr obj_attributes;
+ * };
+ *
+ * union FSSTAT3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * FSSTAT3resok resok;
+ * default:
+ * FSSTAT3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_fsstatres(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *res)
+static int decode_fsstat3resok(struct xdr_stream *xdr,
+ struct nfs_fsstat *result)
{
- int status;
-
- status = ntohl(*p++);
-
- p = xdr_decode_post_op_attr(p, res->fattr);
- if (status != 0)
- return nfs_stat_to_errno(status);
-
- p = xdr_decode_hyper(p, &res->tbytes);
- p = xdr_decode_hyper(p, &res->fbytes);
- p = xdr_decode_hyper(p, &res->abytes);
- p = xdr_decode_hyper(p, &res->tfiles);
- p = xdr_decode_hyper(p, &res->ffiles);
- p = xdr_decode_hyper(p, &res->afiles);
+ __be32 *p;
+ p = xdr_inline_decode(xdr, 8 * 6 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ p = xdr_decode_size3(p, &result->tbytes);
+ p = xdr_decode_size3(p, &result->fbytes);
+ p = xdr_decode_size3(p, &result->abytes);
+ p = xdr_decode_size3(p, &result->tfiles);
+ p = xdr_decode_size3(p, &result->ffiles);
+ xdr_decode_size3(p, &result->afiles);
/* ignore invarsec */
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fsstat *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+ error = decode_fsstat3resok(xdr, result);
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode FSINFO reply
+ * 3.3.19 FSINFO3res
+ *
+ * struct FSINFO3resok {
+ * post_op_attr obj_attributes;
+ * uint32 rtmax;
+ * uint32 rtpref;
+ * uint32 rtmult;
+ * uint32 wtmax;
+ * uint32 wtpref;
+ * uint32 wtmult;
+ * uint32 dtpref;
+ * size3 maxfilesize;
+ * nfstime3 time_delta;
+ * uint32 properties;
+ * };
+ *
+ * struct FSINFO3resfail {
+ * post_op_attr obj_attributes;
+ * };
+ *
+ * union FSINFO3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * FSINFO3resok resok;
+ * default:
+ * FSINFO3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res)
+static int decode_fsinfo3resok(struct xdr_stream *xdr,
+ struct nfs_fsinfo *result)
{
- int status;
-
- status = ntohl(*p++);
-
- p = xdr_decode_post_op_attr(p, res->fattr);
- if (status != 0)
- return nfs_stat_to_errno(status);
+ __be32 *p;
- res->rtmax = ntohl(*p++);
- res->rtpref = ntohl(*p++);
- res->rtmult = ntohl(*p++);
- res->wtmax = ntohl(*p++);
- res->wtpref = ntohl(*p++);
- res->wtmult = ntohl(*p++);
- res->dtpref = ntohl(*p++);
- p = xdr_decode_hyper(p, &res->maxfilesize);
- p = xdr_decode_time3(p, &res->time_delta);
+ p = xdr_inline_decode(xdr, 4 * 7 + 8 + 8 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ result->rtmax = be32_to_cpup(p++);
+ result->rtpref = be32_to_cpup(p++);
+ result->rtmult = be32_to_cpup(p++);
+ result->wtmax = be32_to_cpup(p++);
+ result->wtpref = be32_to_cpup(p++);
+ result->wtmult = be32_to_cpup(p++);
+ result->dtpref = be32_to_cpup(p++);
+ p = xdr_decode_size3(p, &result->maxfilesize);
+ xdr_decode_nfstime3(p, &result->time_delta);
/* ignore properties */
- res->lease_time = 0;
+ result->lease_time = 0;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fsinfo *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+ error = decode_fsinfo3resok(xdr, result);
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode PATHCONF reply
+ * 3.3.20 PATHCONF3res
+ *
+ * struct PATHCONF3resok {
+ * post_op_attr obj_attributes;
+ * uint32 linkmax;
+ * uint32 name_max;
+ * bool no_trunc;
+ * bool chown_restricted;
+ * bool case_insensitive;
+ * bool case_preserving;
+ * };
+ *
+ * struct PATHCONF3resfail {
+ * post_op_attr obj_attributes;
+ * };
+ *
+ * union PATHCONF3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * PATHCONF3resok resok;
+ * default:
+ * PATHCONF3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_pathconfres(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *res)
+static int decode_pathconf3resok(struct xdr_stream *xdr,
+ struct nfs_pathconf *result)
{
- int status;
-
- status = ntohl(*p++);
-
- p = xdr_decode_post_op_attr(p, res->fattr);
- if (status != 0)
- return nfs_stat_to_errno(status);
- res->max_link = ntohl(*p++);
- res->max_namelen = ntohl(*p++);
+ __be32 *p;
+ p = xdr_inline_decode(xdr, 4 * 6);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ result->max_link = be32_to_cpup(p++);
+ result->max_namelen = be32_to_cpup(p);
/* ignore remaining fields */
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_pathconf *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+ error = decode_pathconf3resok(xdr, result);
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode COMMIT reply
+ * 3.3.21 COMMIT3res
+ *
+ * struct COMMIT3resok {
+ * wcc_data file_wcc;
+ * writeverf3 verf;
+ * };
+ *
+ * struct COMMIT3resfail {
+ * wcc_data file_wcc;
+ * };
+ *
+ * union COMMIT3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * COMMIT3resok resok;
+ * default:
+ * COMMIT3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_commitres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
+static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_writeres *result)
{
- int status;
-
- status = ntohl(*p++);
- p = xdr_decode_wcc_data(p, res->fattr);
- if (status != 0)
- return nfs_stat_to_errno(status);
-
- res->verf->verifier[0] = *p++;
- res->verf->verifier[1] = *p++;
- return 0;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+ error = decode_writeverf3(xdr, result->verf->verifier);
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
#ifdef CONFIG_NFS_V3_ACL
-/*
- * Decode GETACL reply
- */
-static int
-nfs3_xdr_getaclres(struct rpc_rqst *req, __be32 *p,
- struct nfs3_getaclres *res)
+
+static inline int decode_getacl3resok(struct xdr_stream *xdr,
+ struct nfs3_getaclres *result)
{
- struct xdr_buf *buf = &req->rq_rcv_buf;
- int status = ntohl(*p++);
struct posix_acl **acl;
unsigned int *aclcnt;
- int err, base;
-
- if (status != 0)
- return nfs_stat_to_errno(status);
- p = xdr_decode_post_op_attr(p, res->fattr);
- res->mask = ntohl(*p++);
- if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
- return -EINVAL;
- base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base;
-
- acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL;
- aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL;
- err = nfsacl_decode(buf, base, aclcnt, acl);
-
- acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL;
- aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL;
- if (err > 0)
- err = nfsacl_decode(buf, base + err, aclcnt, acl);
- return (err > 0) ? 0 : err;
+ size_t hdrlen;
+ int error;
+
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ error = decode_uint32(xdr, &result->mask);
+ if (unlikely(error))
+ goto out;
+ error = -EINVAL;
+ if (result->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+ goto out;
+
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+
+ acl = NULL;
+ if (result->mask & NFS_ACL)
+ acl = &result->acl_access;
+ aclcnt = NULL;
+ if (result->mask & NFS_ACLCNT)
+ aclcnt = &result->acl_access_count;
+ error = nfsacl_decode(xdr->buf, hdrlen, aclcnt, acl);
+ if (unlikely(error <= 0))
+ goto out;
+
+ acl = NULL;
+ if (result->mask & NFS_DFACL)
+ acl = &result->acl_default;
+ aclcnt = NULL;
+ if (result->mask & NFS_DFACLCNT)
+ aclcnt = &result->acl_default_count;
+ error = nfsacl_decode(xdr->buf, hdrlen + error, aclcnt, acl);
+ if (unlikely(error <= 0))
+ return error;
+ error = 0;
+out:
+ return error;
}
-/*
- * Decode setacl reply.
- */
-static int
-nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
+static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs3_getaclres *result)
{
- int status = ntohl(*p++);
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_getacl3resok(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
+}
- if (status)
- return nfs_stat_to_errno(status);
- xdr_decode_post_op_attr(p, fattr);
- return 0;
+static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fattr *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_post_op_attr(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
+
#endif /* CONFIG_NFS_V3_ACL */
#define PROC(proc, argtype, restype, timer) \
[NFS3PROC_##proc] = { \
.p_proc = NFS3PROC_##proc, \
- .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
- .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
- .p_arglen = NFS3_##argtype##_sz, \
- .p_replen = NFS3_##restype##_sz, \
+ .p_encode = (kxdreproc_t)nfs3_xdr_enc_##argtype##3args, \
+ .p_decode = (kxdrdproc_t)nfs3_xdr_dec_##restype##3res, \
+ .p_arglen = NFS3_##argtype##args_sz, \
+ .p_replen = NFS3_##restype##res_sz, \
.p_timer = timer, \
.p_statidx = NFS3PROC_##proc, \
.p_name = #proc, \
}
struct rpc_procinfo nfs3_procedures[] = {
- PROC(GETATTR, fhandle, attrstat, 1),
- PROC(SETATTR, sattrargs, wccstat, 0),
- PROC(LOOKUP, diropargs, lookupres, 2),
- PROC(ACCESS, accessargs, accessres, 1),
- PROC(READLINK, readlinkargs, readlinkres, 3),
- PROC(READ, readargs, readres, 3),
- PROC(WRITE, writeargs, writeres, 4),
- PROC(CREATE, createargs, createres, 0),
- PROC(MKDIR, mkdirargs, createres, 0),
- PROC(SYMLINK, symlinkargs, createres, 0),
- PROC(MKNOD, mknodargs, createres, 0),
- PROC(REMOVE, removeargs, removeres, 0),
- PROC(RMDIR, diropargs, wccstat, 0),
- PROC(RENAME, renameargs, renameres, 0),
- PROC(LINK, linkargs, linkres, 0),
- PROC(READDIR, readdirargs, readdirres, 3),
- PROC(READDIRPLUS, readdirargs, readdirres, 3),
- PROC(FSSTAT, fhandle, fsstatres, 0),
- PROC(FSINFO, fhandle, fsinfores, 0),
- PROC(PATHCONF, fhandle, pathconfres, 0),
- PROC(COMMIT, commitargs, commitres, 5),
+ PROC(GETATTR, getattr, getattr, 1),
+ PROC(SETATTR, setattr, setattr, 0),
+ PROC(LOOKUP, lookup, lookup, 2),
+ PROC(ACCESS, access, access, 1),
+ PROC(READLINK, readlink, readlink, 3),
+ PROC(READ, read, read, 3),
+ PROC(WRITE, write, write, 4),
+ PROC(CREATE, create, create, 0),
+ PROC(MKDIR, mkdir, create, 0),
+ PROC(SYMLINK, symlink, create, 0),
+ PROC(MKNOD, mknod, create, 0),
+ PROC(REMOVE, remove, remove, 0),
+ PROC(RMDIR, lookup, setattr, 0),
+ PROC(RENAME, rename, rename, 0),
+ PROC(LINK, link, link, 0),
+ PROC(READDIR, readdir, readdir, 3),
+ PROC(READDIRPLUS, readdirplus, readdir, 3),
+ PROC(FSSTAT, getattr, fsstat, 0),
+ PROC(FSINFO, getattr, fsinfo, 0),
+ PROC(PATHCONF, getattr, pathconf, 0),
+ PROC(COMMIT, commit, commit, 5),
};
struct rpc_version nfs_version3 = {
@@ -1183,8 +2468,8 @@ struct rpc_version nfs_version3 = {
static struct rpc_procinfo nfs3_acl_procedures[] = {
[ACLPROC3_GETACL] = {
.p_proc = ACLPROC3_GETACL,
- .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs,
- .p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
+ .p_encode = (kxdreproc_t)nfs3_xdr_enc_getacl3args,
+ .p_decode = (kxdrdproc_t)nfs3_xdr_dec_getacl3res,
.p_arglen = ACL3_getaclargs_sz,
.p_replen = ACL3_getaclres_sz,
.p_timer = 1,
@@ -1192,8 +2477,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
},
[ACLPROC3_SETACL] = {
.p_proc = ACLPROC3_SETACL,
- .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs,
- .p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
+ .p_encode = (kxdreproc_t)nfs3_xdr_enc_setacl3args,
+ .p_decode = (kxdrdproc_t)nfs3_xdr_dec_setacl3res,
.p_arglen = ACL3_setaclargs_sz,
.p_replen = ACL3_setaclres_sz,
.p_timer = 0,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9fa4963..7a74740 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -44,6 +44,7 @@ enum nfs4_client_state {
NFS4CLNT_RECLAIM_REBOOT,
NFS4CLNT_RECLAIM_NOGRACE,
NFS4CLNT_DELEGRETURN,
+ NFS4CLNT_LAYOUTRECALL,
NFS4CLNT_SESSION_RESET,
NFS4CLNT_RECALL_SLOT,
};
@@ -109,7 +110,7 @@ struct nfs_unique_id {
struct nfs4_state_owner {
struct nfs_unique_id so_owner_id;
struct nfs_server *so_server;
- struct rb_node so_client_node;
+ struct rb_node so_server_node;
struct rpc_cred *so_cred; /* Associated cred */
@@ -227,12 +228,6 @@ struct nfs4_state_maintenance_ops {
extern const struct dentry_operations nfs4_dentry_operations;
extern const struct inode_operations nfs4_dir_inode_operations;
-/* inode.c */
-extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
-extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int);
-extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
-
-
/* nfs4proc.c */
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
@@ -241,11 +236,12 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
struct nfs4_fs_locations *fs_locations, struct page *page);
extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
+extern const struct xattr_handler *nfs4_xattr_handlers[];
#if defined(CONFIG_NFS_V4_1)
static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -331,7 +327,6 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
extern const nfs4_stateid zero_stateid;
/* nfs4xdr.c */
-extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
extern struct rpc_procinfo nfs4_procedures[];
struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 2e92f0d..23f930c 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -82,7 +82,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
{
struct nfs4_file_layout_dsaddr *dsaddr;
int status = -EINVAL;
- struct nfs_server *nfss = NFS_SERVER(lo->inode);
+ struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
dprintk("--> %s\n", __func__);
@@ -101,7 +101,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
/* find and reference the deviceid */
dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
if (dsaddr == NULL) {
- dsaddr = get_device_info(lo->inode, id);
+ dsaddr = get_device_info(lo->plh_inode, id);
if (dsaddr == NULL)
goto out;
}
@@ -243,7 +243,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
static void
filelayout_free_lseg(struct pnfs_layout_segment *lseg)
{
- struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode);
+ struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode);
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
dprintk("--> %s\n", __func__);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 32c8758..9d992b0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -49,6 +49,7 @@
#include <linux/mount.h>
#include <linux/module.h>
#include <linux/sunrpc/bc_xprt.h>
+#include <linux/xattr.h>
#include "nfs4_fs.h"
#include "delegation.h"
@@ -355,9 +356,9 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
}
/*
- * Signal state manager thread if session is drained
+ * Signal state manager thread if session fore channel is drained
*/
-static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
+static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
{
struct rpc_task *task;
@@ -371,8 +372,20 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
if (ses->fc_slot_table.highest_used_slotid != -1)
return;
- dprintk("%s COMPLETE: Session Drained\n", __func__);
- complete(&ses->complete);
+ dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__);
+ complete(&ses->fc_slot_table.complete);
+}
+
+/*
+ * Signal state manager thread if session back channel is drained
+ */
+void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
+{
+ if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) ||
+ ses->bc_slot_table.highest_used_slotid != -1)
+ return;
+ dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__);
+ complete(&ses->bc_slot_table.complete);
}
static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
@@ -389,7 +402,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
spin_lock(&tbl->slot_tbl_lock);
nfs4_free_slot(tbl, res->sr_slot);
- nfs41_check_drain_session_complete(res->sr_session);
+ nfs4_check_drain_fc_complete(res->sr_session);
spin_unlock(&tbl->slot_tbl_lock);
res->sr_slot = NULL;
}
@@ -429,7 +442,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
* returned NFS4ERR_DELAY as per Section 2.10.6.2
* of RFC5661.
*/
- dprintk("%s: slot=%ld seq=%d: Operation in progress\n",
+ dprintk("%s: slot=%td seq=%d: Operation in progress\n",
__func__,
res->sr_slot - res->sr_session->fc_slot_table.slots,
res->sr_slot->seq_nr);
@@ -573,7 +586,7 @@ int nfs4_setup_sequence(const struct nfs_server *server,
goto out;
}
- dprintk("--> %s clp %p session %p sr_slot %ld\n",
+ dprintk("--> %s clp %p session %p sr_slot %td\n",
__func__, session->clp, session, res->sr_slot ?
res->sr_slot - session->fc_slot_table.slots : -1);
@@ -1826,6 +1839,8 @@ struct nfs4_closedata {
struct nfs_closeres res;
struct nfs_fattr fattr;
unsigned long timestamp;
+ bool roc;
+ u32 roc_barrier;
};
static void nfs4_free_closedata(void *data)
@@ -1833,6 +1848,8 @@ static void nfs4_free_closedata(void *data)
struct nfs4_closedata *calldata = data;
struct nfs4_state_owner *sp = calldata->state->owner;
+ if (calldata->roc)
+ pnfs_roc_release(calldata->state->inode);
nfs4_put_open_state(calldata->state);
nfs_free_seqid(calldata->arg.seqid);
nfs4_put_state_owner(sp);
@@ -1865,6 +1882,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
*/
switch (task->tk_status) {
case 0:
+ if (calldata->roc)
+ pnfs_roc_set_barrier(state->inode,
+ calldata->roc_barrier);
nfs_set_open_stateid(state, &calldata->res.stateid, 0);
renew_lease(server, calldata->timestamp);
nfs4_close_clear_stateid_flags(state,
@@ -1917,8 +1937,15 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
return;
}
- if (calldata->arg.fmode == 0)
+ if (calldata->arg.fmode == 0) {
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
+ if (calldata->roc &&
+ pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
+ rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
+ task, NULL);
+ return;
+ }
+ }
nfs_fattr_init(calldata->res.fattr);
calldata->timestamp = jiffies;
@@ -1946,7 +1973,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
*
* NOTE: Caller must be holding the sp->so_owner semaphore!
*/
-int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait)
+int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_closedata *calldata;
@@ -1981,11 +2008,12 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
calldata->res.fattr = &calldata->fattr;
calldata->res.seqid = calldata->arg.seqid;
calldata->res.server = server;
+ calldata->roc = roc;
path_get(path);
calldata->path = *path;
- msg.rpc_argp = &calldata->arg,
- msg.rpc_resp = &calldata->res,
+ msg.rpc_argp = &calldata->arg;
+ msg.rpc_resp = &calldata->res;
task_setup_data.callback_data = calldata;
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
@@ -1998,6 +2026,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
out_free_calldata:
kfree(calldata);
out:
+ if (roc)
+ pnfs_roc_release(state->inode);
nfs4_put_open_state(state);
nfs4_put_state_owner(sp);
return status;
@@ -2486,6 +2516,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
path = &ctx->path;
fmode = ctx->mode;
}
+ sattr->ia_mode &= ~current_umask();
state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
d_drop(dentry);
if (IS_ERR(state)) {
@@ -2816,6 +2847,8 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
{
struct nfs4_exception exception = { };
int err;
+
+ sattr->ia_mode &= ~current_umask();
do {
err = nfs4_handle_exception(NFS_SERVER(dir),
_nfs4_proc_mkdir(dir, dentry, sattr),
@@ -2852,8 +2885,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
res.pgbase = args.pgbase;
status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0);
- if (status == 0)
+ if (status >= 0) {
memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
+ status += args.pgbase;
+ }
nfs_invalidate_atime(dir);
@@ -2914,6 +2949,8 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
{
struct nfs4_exception exception = { };
int err;
+
+ sattr->ia_mode &= ~current_umask();
do {
err = nfs4_handle_exception(NFS_SERVER(dir),
_nfs4_proc_mknod(dir, dentry, sattr, rdev),
@@ -3359,6 +3396,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
ret = nfs_revalidate_inode(server, inode);
if (ret < 0)
return ret;
+ if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
+ nfs_zap_acl_cache(inode);
ret = nfs4_read_cached_acl(inode, buf, buflen);
if (ret != -ENOENT)
return ret;
@@ -3387,6 +3426,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
nfs_inode_return_delegation(inode);
buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
+ /*
+ * Acl update can result in inode attribute update.
+ * so mark the attribute cache invalid.
+ */
+ spin_lock(&inode->i_lock);
+ NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
+ spin_unlock(&inode->i_lock);
nfs_access_zap_cache(inode);
nfs_zap_acl_cache(inode);
return ret;
@@ -3467,6 +3513,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
struct nfs4_setclientid setclientid = {
.sc_verifier = &sc_verifier,
.sc_prog = program,
+ .sc_cb_ident = clp->cl_cb_ident,
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
@@ -3506,7 +3553,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
if (signalled())
break;
if (loop++ & 1)
- ssleep(clp->cl_lease_time + 1);
+ ssleep(clp->cl_lease_time / HZ + 1);
else
if (++clp->cl_id_uniquifier == 0)
break;
@@ -3652,8 +3699,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
data->rpc_status = 0;
task_setup_data.callback_data = data;
- msg.rpc_argp = &data->args,
- msg.rpc_resp = &data->res,
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -3732,6 +3779,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
goto out;
lsp = request->fl_u.nfs4_fl.owner;
arg.lock_owner.id = lsp->ls_id.id;
+ arg.lock_owner.s_dev = server->s_dev;
status = nfs4_call_sync(server, &msg, &arg, &res, 1);
switch (status) {
case 0:
@@ -3897,8 +3945,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
return ERR_PTR(-ENOMEM);
}
- msg.rpc_argp = &data->arg,
- msg.rpc_resp = &data->res,
+ msg.rpc_argp = &data->arg;
+ msg.rpc_resp = &data->res;
task_setup_data.callback_data = data;
return rpc_run_task(&task_setup_data);
}
@@ -3977,6 +4025,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->arg.lock_stateid = &lsp->ls_stateid;
p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
p->arg.lock_owner.id = lsp->ls_id.id;
+ p->arg.lock_owner.s_dev = server->s_dev;
p->res.lock_seqid = p->arg.lock_seqid;
p->lsp = lsp;
p->server = server;
@@ -4134,8 +4183,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
data->arg.reclaim = NFS_LOCK_RECLAIM;
task_setup_data.callback_ops = &nfs4_recover_lock_ops;
}
- msg.rpc_argp = &data->arg,
- msg.rpc_resp = &data->res,
+ msg.rpc_argp = &data->arg;
+ msg.rpc_resp = &data->res;
task_setup_data.callback_data = data;
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
@@ -4381,48 +4430,43 @@ void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
return;
args->lock_owner.clientid = server->nfs_client->cl_clientid;
args->lock_owner.id = lsp->ls_id.id;
+ args->lock_owner.s_dev = server->s_dev;
msg.rpc_argp = args;
rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
}
#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
-int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
- size_t buflen, int flags)
+static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key,
+ const void *buf, size_t buflen,
+ int flags, int type)
{
- struct inode *inode = dentry->d_inode;
-
- if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
- return -EOPNOTSUPP;
+ if (strcmp(key, "") != 0)
+ return -EINVAL;
- return nfs4_proc_set_acl(inode, buf, buflen);
+ return nfs4_proc_set_acl(dentry->d_inode, buf, buflen);
}
-/* The getxattr man page suggests returning -ENODATA for unknown attributes,
- * and that's what we'll do for e.g. user attributes that haven't been set.
- * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported
- * attributes in kernel-managed attribute namespaces. */
-ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf,
- size_t buflen)
+static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key,
+ void *buf, size_t buflen, int type)
{
- struct inode *inode = dentry->d_inode;
-
- if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
- return -EOPNOTSUPP;
+ if (strcmp(key, "") != 0)
+ return -EINVAL;
- return nfs4_proc_get_acl(inode, buf, buflen);
+ return nfs4_proc_get_acl(dentry->d_inode, buf, buflen);
}
-ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
+static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
+ size_t list_len, const char *name,
+ size_t name_len, int type)
{
- size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
+ size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
return 0;
- if (buf && buflen < len)
- return -ERANGE;
- if (buf)
- memcpy(buf, XATTR_NAME_NFSV4_ACL, len);
+
+ if (list && len <= list_len)
+ memcpy(list, XATTR_NAME_NFSV4_ACL, len);
return len;
}
@@ -4475,6 +4519,25 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
#ifdef CONFIG_NFS_V4_1
/*
+ * Check the exchange flags returned by the server for invalid flags, having
+ * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or
+ * DS flags set.
+ */
+static int nfs4_check_cl_exchange_flags(u32 flags)
+{
+ if (flags & ~EXCHGID4_FLAG_MASK_R)
+ goto out_inval;
+ if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
+ (flags & EXCHGID4_FLAG_USE_NON_PNFS))
+ goto out_inval;
+ if (!(flags & (EXCHGID4_FLAG_MASK_PNFS)))
+ goto out_inval;
+ return NFS_OK;
+out_inval:
+ return -NFS4ERR_INVAL;
+}
+
+/*
* nfs4_proc_exchange_id()
*
* Since the clientid has expired, all compounds using sessions
@@ -4487,7 +4550,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
nfs4_verifier verifier;
struct nfs41_exchange_id_args args = {
.client = clp,
- .flags = clp->cl_exchange_flags,
+ .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
};
struct nfs41_exchange_id_res res = {
.client = clp,
@@ -4504,9 +4567,6 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
dprintk("--> %s\n", __func__);
BUG_ON(clp == NULL);
- /* Remove server-only flags */
- args.flags &= ~EXCHGID4_FLAG_CONFIRMED_R;
-
p = (u32 *)verifier.data;
*p++ = htonl((u32)clp->cl_boot_time.tv_sec);
*p = htonl((u32)clp->cl_boot_time.tv_nsec);
@@ -4532,6 +4592,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
break;
}
+ status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
dprintk("<-- %s status= %d\n", __func__, status);
return status;
}
@@ -4765,17 +4826,17 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
if (!session)
return NULL;
- init_completion(&session->complete);
-
tbl = &session->fc_slot_table;
tbl->highest_used_slotid = -1;
spin_lock_init(&tbl->slot_tbl_lock);
rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
+ init_completion(&tbl->complete);
tbl = &session->bc_slot_table;
tbl->highest_used_slotid = -1;
spin_lock_init(&tbl->slot_tbl_lock);
rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+ init_completion(&tbl->complete);
session->session_state = 1<<NFS4_SESSION_INITING;
@@ -5269,13 +5330,23 @@ static void
nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutget *lgp = calldata;
- struct inode *ino = lgp->args.inode;
- struct nfs_server *server = NFS_SERVER(ino);
+ struct nfs_server *server = NFS_SERVER(lgp->args.inode);
dprintk("--> %s\n", __func__);
+ /* Note the is a race here, where a CB_LAYOUTRECALL can come in
+ * right now covering the LAYOUTGET we are about to send.
+ * However, that is not so catastrophic, and there seems
+ * to be no way to prevent it completely.
+ */
if (nfs4_setup_sequence(server, &lgp->args.seq_args,
&lgp->res.seq_res, 0, task))
return;
+ if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
+ NFS_I(lgp->args.inode)->layout,
+ lgp->args.ctx->state)) {
+ rpc_exit(task, NFS4_OK);
+ return;
+ }
rpc_call_start(task);
}
@@ -5302,7 +5373,6 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
return;
}
}
- lgp->status = task->tk_status;
dprintk("<-- %s\n", __func__);
}
@@ -5311,7 +5381,6 @@ static void nfs4_layoutget_release(void *calldata)
struct nfs4_layoutget *lgp = calldata;
dprintk("--> %s\n", __func__);
- put_layout_hdr(lgp->args.inode);
if (lgp->res.layout.buf != NULL)
free_page((unsigned long) lgp->res.layout.buf);
put_nfs_open_context(lgp->args.ctx);
@@ -5356,13 +5425,10 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
if (IS_ERR(task))
return PTR_ERR(task);
status = nfs4_wait_for_completion_rpc_task(task);
- if (status != 0)
- goto out;
- status = lgp->status;
- if (status != 0)
- goto out;
- status = pnfs_layout_process(lgp);
-out:
+ if (status == 0)
+ status = task->tk_status;
+ if (status == 0)
+ status = pnfs_layout_process(lgp);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
return status;
@@ -5493,9 +5559,10 @@ static const struct inode_operations nfs4_file_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
- .getxattr = nfs4_getxattr,
- .setxattr = nfs4_setxattr,
- .listxattr = nfs4_listxattr,
+ .getxattr = generic_getxattr,
+ .setxattr = generic_setxattr,
+ .listxattr = generic_listxattr,
+ .removexattr = generic_removexattr,
};
const struct nfs_rpc_ops nfs_v4_clientops = {
@@ -5540,6 +5607,18 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.open_context = nfs4_atomic_open,
};
+static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
+ .prefix = XATTR_NAME_NFSV4_ACL,
+ .list = nfs4_xattr_list_nfs4_acl,
+ .get = nfs4_xattr_get_nfs4_acl,
+ .set = nfs4_xattr_set_nfs4_acl,
+};
+
+const struct xattr_handler *nfs4_xattr_handlers[] = {
+ &nfs4_xattr_nfs4_acl_handler,
+ NULL
+};
+
/*
* Local variables:
* c-basic-offset: 8
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 72b6c58..402143d 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -63,9 +63,14 @@ nfs4_renew_state(struct work_struct *work)
ops = clp->cl_mvops->state_renewal_ops;
dprintk("%s: start\n", __func__);
- /* Are there any active superblocks? */
- if (list_empty(&clp->cl_superblocks))
+
+ rcu_read_lock();
+ if (list_empty(&clp->cl_superblocks)) {
+ rcu_read_unlock();
goto out;
+ }
+ rcu_read_unlock();
+
spin_lock(&clp->cl_lock);
lease = clp->cl_lease_time;
last = clp->cl_last_renewal;
@@ -75,7 +80,7 @@ nfs4_renew_state(struct work_struct *work)
cred = ops->get_state_renewal_cred_locked(clp);
spin_unlock(&clp->cl_lock);
if (cred == NULL) {
- if (list_empty(&clp->cl_delegations)) {
+ if (!nfs_delegations_present(clp)) {
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
goto out;
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f575a31..2336d53 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -105,14 +105,17 @@ static void nfs4_clear_machine_cred(struct nfs_client *clp)
put_rpccred(cred);
}
-struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
+static struct rpc_cred *
+nfs4_get_renew_cred_server_locked(struct nfs_server *server)
{
+ struct rpc_cred *cred = NULL;
struct nfs4_state_owner *sp;
struct rb_node *pos;
- struct rpc_cred *cred = NULL;
- for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
- sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+ for (pos = rb_first(&server->state_owners);
+ pos != NULL;
+ pos = rb_next(pos)) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
if (list_empty(&sp->so_states))
continue;
cred = get_rpccred(sp->so_cred);
@@ -121,6 +124,28 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
return cred;
}
+/**
+ * nfs4_get_renew_cred_locked - Acquire credential for a renew operation
+ * @clp: client state handle
+ *
+ * Returns an rpc_cred with reference count bumped, or NULL.
+ * Caller must hold clp->cl_lock.
+ */
+struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
+{
+ struct rpc_cred *cred = NULL;
+ struct nfs_server *server;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ cred = nfs4_get_renew_cred_server_locked(server);
+ if (cred != NULL)
+ break;
+ }
+ rcu_read_unlock();
+ return cred;
+}
+
#if defined(CONFIG_NFS_V4_1)
static int nfs41_setup_state_renewal(struct nfs_client *clp)
@@ -142,6 +167,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
return status;
}
+/*
+ * Back channel returns NFS4ERR_DELAY for new requests when
+ * NFS4_SESSION_DRAINING is set so there is no work to be done when draining
+ * is ended.
+ */
static void nfs4_end_drain_session(struct nfs_client *clp)
{
struct nfs4_session *ses = clp->cl_session;
@@ -165,22 +195,32 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
}
}
-static int nfs4_begin_drain_session(struct nfs_client *clp)
+static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
{
- struct nfs4_session *ses = clp->cl_session;
- struct nfs4_slot_table *tbl = &ses->fc_slot_table;
-
spin_lock(&tbl->slot_tbl_lock);
- set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
if (tbl->highest_used_slotid != -1) {
- INIT_COMPLETION(ses->complete);
+ INIT_COMPLETION(tbl->complete);
spin_unlock(&tbl->slot_tbl_lock);
- return wait_for_completion_interruptible(&ses->complete);
+ return wait_for_completion_interruptible(&tbl->complete);
}
spin_unlock(&tbl->slot_tbl_lock);
return 0;
}
+static int nfs4_begin_drain_session(struct nfs_client *clp)
+{
+ struct nfs4_session *ses = clp->cl_session;
+ int ret = 0;
+
+ set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
+ /* back channel */
+ ret = nfs4_wait_on_slot_tbl(&ses->bc_slot_table);
+ if (ret)
+ return ret;
+ /* fore channel */
+ return nfs4_wait_on_slot_tbl(&ses->fc_slot_table);
+}
+
int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
{
int status;
@@ -192,6 +232,12 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
status = nfs4_proc_create_session(clp);
if (status != 0)
goto out;
+ status = nfs4_set_callback_sessionid(clp);
+ if (status != 0) {
+ printk(KERN_WARNING "Sessionid not set. No callback service\n");
+ nfs_callback_down(1);
+ status = 0;
+ }
nfs41_setup_state_renewal(clp);
nfs_mark_client_ready(clp, NFS_CS_READY);
out:
@@ -210,28 +256,56 @@ struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
#endif /* CONFIG_NFS_V4_1 */
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
+static struct rpc_cred *
+nfs4_get_setclientid_cred_server(struct nfs_server *server)
{
+ struct nfs_client *clp = server->nfs_client;
+ struct rpc_cred *cred = NULL;
struct nfs4_state_owner *sp;
struct rb_node *pos;
+
+ spin_lock(&clp->cl_lock);
+ pos = rb_first(&server->state_owners);
+ if (pos != NULL) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
+ cred = get_rpccred(sp->so_cred);
+ }
+ spin_unlock(&clp->cl_lock);
+ return cred;
+}
+
+/**
+ * nfs4_get_setclientid_cred - Acquire credential for a setclientid operation
+ * @clp: client state handle
+ *
+ * Returns an rpc_cred with reference count bumped, or NULL.
+ */
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
+{
+ struct nfs_server *server;
struct rpc_cred *cred;
spin_lock(&clp->cl_lock);
cred = nfs4_get_machine_cred_locked(clp);
+ spin_unlock(&clp->cl_lock);
if (cred != NULL)
goto out;
- pos = rb_first(&clp->cl_state_owners);
- if (pos != NULL) {
- sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
- cred = get_rpccred(sp->so_cred);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ cred = nfs4_get_setclientid_cred_server(server);
+ if (cred != NULL)
+ break;
}
+ rcu_read_unlock();
+
out:
- spin_unlock(&clp->cl_lock);
return cred;
}
-static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
- __u64 minval, int maxbits)
+static void nfs_alloc_unique_id_locked(struct rb_root *root,
+ struct nfs_unique_id *new,
+ __u64 minval, int maxbits)
{
struct rb_node **p, *parent;
struct nfs_unique_id *pos;
@@ -286,16 +360,15 @@ static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
}
static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
+nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
{
- struct nfs_client *clp = server->nfs_client;
- struct rb_node **p = &clp->cl_state_owners.rb_node,
+ struct rb_node **p = &server->state_owners.rb_node,
*parent = NULL;
struct nfs4_state_owner *sp, *res = NULL;
while (*p != NULL) {
parent = *p;
- sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+ sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
if (server < sp->so_server) {
p = &parent->rb_left;
@@ -319,24 +392,17 @@ nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
}
static struct nfs4_state_owner *
-nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
+nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
{
- struct rb_node **p = &clp->cl_state_owners.rb_node,
+ struct nfs_server *server = new->so_server;
+ struct rb_node **p = &server->state_owners.rb_node,
*parent = NULL;
struct nfs4_state_owner *sp;
while (*p != NULL) {
parent = *p;
- sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+ sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
- if (new->so_server < sp->so_server) {
- p = &parent->rb_left;
- continue;
- }
- if (new->so_server > sp->so_server) {
- p = &parent->rb_right;
- continue;
- }
if (new->so_cred < sp->so_cred)
p = &parent->rb_left;
else if (new->so_cred > sp->so_cred)
@@ -346,18 +412,21 @@ nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
return sp;
}
}
- nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
- rb_link_node(&new->so_client_node, parent, p);
- rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
+ nfs_alloc_unique_id_locked(&server->openowner_id,
+ &new->so_owner_id, 1, 64);
+ rb_link_node(&new->so_server_node, parent, p);
+ rb_insert_color(&new->so_server_node, &server->state_owners);
return new;
}
static void
-nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
+nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
{
- if (!RB_EMPTY_NODE(&sp->so_client_node))
- rb_erase(&sp->so_client_node, &clp->cl_state_owners);
- nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
+ struct nfs_server *server = sp->so_server;
+
+ if (!RB_EMPTY_NODE(&sp->so_server_node))
+ rb_erase(&sp->so_server_node, &server->state_owners);
+ nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id);
}
/*
@@ -386,23 +455,32 @@ nfs4_alloc_state_owner(void)
static void
nfs4_drop_state_owner(struct nfs4_state_owner *sp)
{
- if (!RB_EMPTY_NODE(&sp->so_client_node)) {
- struct nfs_client *clp = sp->so_server->nfs_client;
+ if (!RB_EMPTY_NODE(&sp->so_server_node)) {
+ struct nfs_server *server = sp->so_server;
+ struct nfs_client *clp = server->nfs_client;
spin_lock(&clp->cl_lock);
- rb_erase(&sp->so_client_node, &clp->cl_state_owners);
- RB_CLEAR_NODE(&sp->so_client_node);
+ rb_erase(&sp->so_server_node, &server->state_owners);
+ RB_CLEAR_NODE(&sp->so_server_node);
spin_unlock(&clp->cl_lock);
}
}
-struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
+/**
+ * nfs4_get_state_owner - Look up a state owner given a credential
+ * @server: nfs_server to search
+ * @cred: RPC credential to match
+ *
+ * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
+ */
+struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
+ struct rpc_cred *cred)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_state_owner *sp, *new;
spin_lock(&clp->cl_lock);
- sp = nfs4_find_state_owner(server, cred);
+ sp = nfs4_find_state_owner_locked(server, cred);
spin_unlock(&clp->cl_lock);
if (sp != NULL)
return sp;
@@ -412,7 +490,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
new->so_server = server;
new->so_cred = cred;
spin_lock(&clp->cl_lock);
- sp = nfs4_insert_state_owner(clp, new);
+ sp = nfs4_insert_state_owner_locked(new);
spin_unlock(&clp->cl_lock);
if (sp == new)
get_rpccred(cred);
@@ -423,6 +501,11 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
return sp;
}
+/**
+ * nfs4_put_state_owner - Release a nfs4_state_owner
+ * @sp: state owner data to release
+ *
+ */
void nfs4_put_state_owner(struct nfs4_state_owner *sp)
{
struct nfs_client *clp = sp->so_server->nfs_client;
@@ -430,7 +513,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
return;
- nfs4_remove_state_owner(clp, sp);
+ nfs4_remove_state_owner_locked(sp);
spin_unlock(&clp->cl_lock);
rpc_destroy_wait_queue(&sp->so_sequence.wait);
put_rpccred(cred);
@@ -585,8 +668,11 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
if (!call_close) {
nfs4_put_open_state(state);
nfs4_put_state_owner(owner);
- } else
- nfs4_do_close(path, state, gfp_mask, wait);
+ } else {
+ bool roc = pnfs_roc(state->inode);
+
+ nfs4_do_close(path, state, gfp_mask, wait, roc);
+ }
}
void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
@@ -633,7 +719,8 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p
static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
{
struct nfs4_lock_state *lsp;
- struct nfs_client *clp = state->owner->so_server->nfs_client;
+ struct nfs_server *server = state->owner->so_server;
+ struct nfs_client *clp = server->nfs_client;
lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
if (lsp == NULL)
@@ -657,7 +744,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
return NULL;
}
spin_lock(&clp->cl_lock);
- nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
+ nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64);
spin_unlock(&clp->cl_lock);
INIT_LIST_HEAD(&lsp->ls_locks);
return lsp;
@@ -665,10 +752,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
{
- struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client;
+ struct nfs_server *server = lsp->ls_state->owner->so_server;
+ struct nfs_client *clp = server->nfs_client;
spin_lock(&clp->cl_lock);
- nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+ nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id);
spin_unlock(&clp->cl_lock);
rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
kfree(lsp);
@@ -1114,15 +1202,19 @@ static void nfs4_clear_open_state(struct nfs4_state *state)
}
}
-static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
+static void nfs4_reset_seqids(struct nfs_server *server,
+ int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
{
+ struct nfs_client *clp = server->nfs_client;
struct nfs4_state_owner *sp;
struct rb_node *pos;
struct nfs4_state *state;
- /* Reset all sequence ids to zero */
- for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
- sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+ spin_lock(&clp->cl_lock);
+ for (pos = rb_first(&server->state_owners);
+ pos != NULL;
+ pos = rb_next(pos)) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
sp->so_seqid.flags = 0;
spin_lock(&sp->so_lock);
list_for_each_entry(state, &sp->so_states, open_states) {
@@ -1131,6 +1223,18 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_re
}
spin_unlock(&sp->so_lock);
}
+ spin_unlock(&clp->cl_lock);
+}
+
+static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
+ int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
+{
+ struct nfs_server *server;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ nfs4_reset_seqids(server, mark_reclaim);
+ rcu_read_unlock();
}
static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
@@ -1148,25 +1252,41 @@ static void nfs4_reclaim_complete(struct nfs_client *clp,
(void)ops->reclaim_complete(clp);
}
-static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
+static void nfs4_clear_reclaim_server(struct nfs_server *server)
{
+ struct nfs_client *clp = server->nfs_client;
struct nfs4_state_owner *sp;
struct rb_node *pos;
struct nfs4_state *state;
- if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
- return 0;
-
- for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
- sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+ spin_lock(&clp->cl_lock);
+ for (pos = rb_first(&server->state_owners);
+ pos != NULL;
+ pos = rb_next(pos)) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
spin_lock(&sp->so_lock);
list_for_each_entry(state, &sp->so_states, open_states) {
- if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags))
+ if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT,
+ &state->flags))
continue;
nfs4_state_mark_reclaim_nograce(clp, state);
}
spin_unlock(&sp->so_lock);
}
+ spin_unlock(&clp->cl_lock);
+}
+
+static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
+{
+ struct nfs_server *server;
+
+ if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+ return 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ nfs4_clear_reclaim_server(server);
+ rcu_read_unlock();
nfs_delegation_reap_unclaimed(clp);
return 1;
@@ -1238,27 +1358,40 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
{
+ struct nfs4_state_owner *sp;
+ struct nfs_server *server;
struct rb_node *pos;
int status = 0;
restart:
- spin_lock(&clp->cl_lock);
- for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
- struct nfs4_state_owner *sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
- if (!test_and_clear_bit(ops->owner_flag_bit, &sp->so_flags))
- continue;
- atomic_inc(&sp->so_count);
- spin_unlock(&clp->cl_lock);
- status = nfs4_reclaim_open_state(sp, ops);
- if (status < 0) {
- set_bit(ops->owner_flag_bit, &sp->so_flags);
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ spin_lock(&clp->cl_lock);
+ for (pos = rb_first(&server->state_owners);
+ pos != NULL;
+ pos = rb_next(pos)) {
+ sp = rb_entry(pos,
+ struct nfs4_state_owner, so_server_node);
+ if (!test_and_clear_bit(ops->owner_flag_bit,
+ &sp->so_flags))
+ continue;
+ atomic_inc(&sp->so_count);
+ spin_unlock(&clp->cl_lock);
+ rcu_read_unlock();
+
+ status = nfs4_reclaim_open_state(sp, ops);
+ if (status < 0) {
+ set_bit(ops->owner_flag_bit, &sp->so_flags);
+ nfs4_put_state_owner(sp);
+ return nfs4_recovery_handle_error(clp, status);
+ }
+
nfs4_put_state_owner(sp);
- return nfs4_recovery_handle_error(clp, status);
+ goto restart;
}
- nfs4_put_state_owner(sp);
- goto restart;
+ spin_unlock(&clp->cl_lock);
}
- spin_unlock(&clp->cl_lock);
+ rcu_read_unlock();
return status;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f313c4c..2ab8e5c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -71,8 +71,8 @@ static int nfs4_stat_to_errno(int);
/* lock,open owner id:
* we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
*/
-#define open_owner_id_maxsz (1 + 4)
-#define lock_owner_id_maxsz (1 + 4)
+#define open_owner_id_maxsz (1 + 1 + 4)
+#define lock_owner_id_maxsz (1 + 1 + 4)
#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
@@ -1088,10 +1088,11 @@ static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lo
{
__be32 *p;
- p = reserve_space(xdr, 28);
+ p = reserve_space(xdr, 32);
p = xdr_encode_hyper(p, lowner->clientid);
- *p++ = cpu_to_be32(16);
+ *p++ = cpu_to_be32(20);
p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+ *p++ = cpu_to_be32(lowner->s_dev);
xdr_encode_hyper(p, lowner->id);
}
@@ -1210,10 +1211,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
*p++ = cpu_to_be32(OP_OPEN);
*p = cpu_to_be32(arg->seqid->sequence->counter);
encode_share_access(xdr, arg->fmode);
- p = reserve_space(xdr, 28);
+ p = reserve_space(xdr, 32);
p = xdr_encode_hyper(p, arg->clientid);
- *p++ = cpu_to_be32(16);
+ *p++ = cpu_to_be32(20);
p = xdr_encode_opaque_fixed(p, "open id:", 8);
+ *p++ = cpu_to_be32(arg->server->s_dev);
xdr_encode_hyper(p, arg->id);
}
@@ -1510,7 +1512,7 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
hdr->replen += decode_restorefh_maxsz;
}
-static int
+static void
encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
{
__be32 *p;
@@ -1521,14 +1523,12 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
p = reserve_space(xdr, 2*4);
*p++ = cpu_to_be32(1);
*p = cpu_to_be32(FATTR4_WORD0_ACL);
- if (arg->acl_len % 4)
- return -EINVAL;
+ BUG_ON(arg->acl_len % 4);
p = reserve_space(xdr, 4);
*p = cpu_to_be32(arg->acl_len);
xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
hdr->nops++;
hdr->replen += decode_setacl_maxsz;
- return 0;
}
static void
@@ -1789,7 +1789,6 @@ encode_layoutget(struct xdr_stream *xdr,
const struct nfs4_layoutget_args *args,
struct compound_hdr *hdr)
{
- nfs4_stateid stateid;
__be32 *p;
p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
@@ -1800,9 +1799,7 @@ encode_layoutget(struct xdr_stream *xdr,
p = xdr_encode_hyper(p, args->range.offset);
p = xdr_encode_hyper(p, args->range.length);
p = xdr_encode_hyper(p, args->minlength);
- pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
- args->ctx->state);
- p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
*p = cpu_to_be32(args->maxcount);
dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
@@ -1833,393 +1830,362 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
/*
* Encode an ACCESS request
*/
-static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs4_accessargs *args)
+static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_accessargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_access(&xdr, args->access, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_access(xdr, args->access, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode LOOKUP request
*/
-static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_arg *args)
+static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_lookup_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->dir_fh, &hdr);
- encode_lookup(&xdr, args->name, &hdr);
- encode_getfh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->dir_fh, &hdr);
+ encode_lookup(xdr, args->name, &hdr);
+ encode_getfh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode LOOKUP_ROOT request
*/
-static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_root_arg *args)
+static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs4_lookup_root_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putrootfh(&xdr, &hdr);
- encode_getfh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putrootfh(xdr, &hdr);
+ encode_getfh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode REMOVE request
*/
-static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args)
+static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs_removeargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_remove(&xdr, &args->name, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_remove(xdr, &args->name, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode RENAME request
*/
-static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args)
+static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs_renameargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->old_dir, &hdr);
- encode_savefh(&xdr, &hdr);
- encode_putfh(&xdr, args->new_dir, &hdr);
- encode_rename(&xdr, args->old_name, args->new_name, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
- encode_restorefh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->old_dir, &hdr);
+ encode_savefh(xdr, &hdr);
+ encode_putfh(xdr, args->new_dir, &hdr);
+ encode_rename(xdr, args->old_name, args->new_name, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_restorefh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode LINK request
*/
-static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_link_arg *args)
+static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_link_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_savefh(&xdr, &hdr);
- encode_putfh(&xdr, args->dir_fh, &hdr);
- encode_link(&xdr, args->name, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
- encode_restorefh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_savefh(xdr, &hdr);
+ encode_putfh(xdr, args->dir_fh, &hdr);
+ encode_link(xdr, args->name, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_restorefh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode CREATE request
*/
-static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args)
+static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_create_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->dir_fh, &hdr);
- encode_savefh(&xdr, &hdr);
- encode_create(&xdr, args, &hdr);
- encode_getfh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
- encode_restorefh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->dir_fh, &hdr);
+ encode_savefh(xdr, &hdr);
+ encode_create(xdr, args, &hdr);
+ encode_getfh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_restorefh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode SYMLINK request
*/
-static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args)
+static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_create_arg *args)
{
- return nfs4_xdr_enc_create(req, p, args);
+ nfs4_xdr_enc_create(req, xdr, args);
}
/*
* Encode GETATTR request
*/
-static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nfs4_getattr_arg *args)
+static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_getattr_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a CLOSE request
*/
-static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args)
+static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_closeargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_close(&xdr, args, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_close(xdr, args, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode an OPEN request
*/
-static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args)
+static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_openargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_savefh(&xdr, &hdr);
- encode_open(&xdr, args, &hdr);
- encode_getfh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
- encode_restorefh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_savefh(xdr, &hdr);
+ encode_open(xdr, args, &hdr);
+ encode_getfh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_restorefh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode an OPEN_CONFIRM request
*/
-static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_open_confirmargs *args)
+static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_open_confirmargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.nops = 0,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_open_confirm(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_open_confirm(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode an OPEN request with no attributes.
*/
-static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args)
+static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_openargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_open(&xdr, args, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_open(xdr, args, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode an OPEN_DOWNGRADE request
*/
-static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args)
+static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_closeargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_open_downgrade(&xdr, args, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_open_downgrade(xdr, args, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a LOCK request
*/
-static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_args *args)
+static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_lock_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_lock(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_lock(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a LOCKT request
*/
-static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_args *args)
+static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_lockt_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_lockt(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_lockt(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a LOCKU request
*/
-static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_args *args)
+static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_locku_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_locku(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_locku(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
-static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
+static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_release_lockowner_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = 0,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_release_lockowner(xdr, &args->lock_owner, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a READLINK request
*/
-static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_readlink *args)
+static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_readlink *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_readlink(&xdr, args, req, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_readlink(xdr, args, req, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
args->pgbase, args->pglen);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a READDIR request
*/
-static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nfs4_readdir_arg *args)
+static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_readdir_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_readdir(&xdr, args, req, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_readdir(xdr, args, req, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
args->pgbase, args->count);
@@ -2227,428 +2193,387 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
__func__, hdr.replen << 2, args->pages,
args->pgbase, args->count);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a READ request
*/
-static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
+static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_readargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_read(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_read(xdr, args, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
args->pages, args->pgbase, args->count);
req->rq_rcv_buf.flags |= XDRBUF_READ;
encode_nops(&hdr);
- return 0;
}
/*
* Encode an SETATTR request
*/
-static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args)
+static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_setattrargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_setattr(&xdr, args, args->server, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_setattr(xdr, args, args->server, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a GETACL request
*/
-static int
-nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
- struct nfs_getaclargs *args)
+static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_getaclargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
uint32_t replen;
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1;
- encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr);
+ encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
args->acl_pages, args->acl_pgbase, args->acl_len);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a WRITE request
*/
-static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_writeargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_write(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_write(xdr, args, &hdr);
req->rq_snd_buf.flags |= XDRBUF_WRITE;
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a COMMIT request
*/
-static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_writeargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_commit(&xdr, args, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_commit(xdr, args, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* FSINFO request
*/
-static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsinfo_arg *args)
+static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs4_fsinfo_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_fsinfo(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_fsinfo(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a PATHCONF request
*/
-static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct nfs4_pathconf_arg *args)
+static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_pathconf_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_getattr_one(xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
&hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a STATFS request
*/
-static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs4_statfs_arg *args)
+static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_statfs_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_getattr_two(xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* GETATTR_BITMAP request
*/
-static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p,
- struct nfs4_server_caps_arg *args)
+static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_server_caps_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fhandle, &hdr);
- encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fhandle, &hdr);
+ encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
FATTR4_WORD0_LINK_SUPPORT|
FATTR4_WORD0_SYMLINK_SUPPORT|
FATTR4_WORD0_ACLSUPPORT, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a RENEW request
*/
-static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp)
+static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_client *clp)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.nops = 0,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_renew(&xdr, clp, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_renew(xdr, clp, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a SETCLIENTID request
*/
-static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid *sc)
+static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_setclientid *sc)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.nops = 0,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_setclientid(&xdr, sc, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_setclientid(xdr, sc, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a SETCLIENTID_CONFIRM request
*/
-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg)
+static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_setclientid_res *arg)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.nops = 0,
};
const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_setclientid_confirm(&xdr, arg, &hdr);
- encode_putrootfh(&xdr, &hdr);
- encode_fsinfo(&xdr, lease_bitmap, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_setclientid_confirm(xdr, arg, &hdr);
+ encode_putrootfh(xdr, &hdr);
+ encode_fsinfo(xdr, lease_bitmap, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* DELEGRETURN request
*/
-static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struct nfs4_delegreturnargs *args)
+static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs4_delegreturnargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fhandle, &hdr);
- encode_delegreturn(&xdr, args->stateid, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fhandle, &hdr);
+ encode_delegreturn(xdr, args->stateid, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode FS_LOCATIONS request
*/
-static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations_arg *args)
+static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_fs_locations_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
uint32_t replen;
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->dir_fh, &hdr);
- encode_lookup(&xdr, args->name, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->dir_fh, &hdr);
+ encode_lookup(xdr, args->name, &hdr);
replen = hdr.replen; /* get the attribute into args->page */
- encode_fs_locations(&xdr, args->bitmask, &hdr);
+ encode_fs_locations(xdr, args->bitmask, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
0, PAGE_SIZE);
encode_nops(&hdr);
- return 0;
}
#if defined(CONFIG_NFS_V4_1)
/*
* EXCHANGE_ID request
*/
-static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p,
- struct nfs41_exchange_id_args *args)
+static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs41_exchange_id_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = args->client->cl_mvops->minor_version,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_exchange_id(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_exchange_id(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a CREATE_SESSION request
*/
-static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p,
- struct nfs41_create_session_args *args)
+static void nfs4_xdr_enc_create_session(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs41_create_session_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = args->client->cl_mvops->minor_version,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_create_session(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_create_session(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a DESTROY_SESSION request
*/
-static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p,
- struct nfs4_session *session)
+static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_session *session)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = session->clp->cl_mvops->minor_version,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_destroy_session(&xdr, session, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_destroy_session(xdr, session, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a SEQUENCE request
*/
-static int nfs4_xdr_enc_sequence(struct rpc_rqst *req, uint32_t *p,
- struct nfs4_sequence_args *args)
+static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs4_sequence_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a GET_LEASE_TIME request
*/
-static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
- struct nfs4_get_lease_time_args *args)
+static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_get_lease_time_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
};
const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->la_seq_args, &hdr);
- encode_putrootfh(&xdr, &hdr);
- encode_fsinfo(&xdr, lease_bitmap, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->la_seq_args, &hdr);
+ encode_putrootfh(xdr, &hdr);
+ encode_fsinfo(xdr, lease_bitmap, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a RECLAIM_COMPLETE request
*/
-static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
- struct nfs41_reclaim_complete_args *args)
+static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs41_reclaim_complete_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args)
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_reclaim_complete(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_reclaim_complete(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode GETDEVICEINFO request
*/
-static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
- struct nfs4_getdeviceinfo_args *args)
+static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_getdeviceinfo_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_getdeviceinfo(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_getdeviceinfo(xdr, args, &hdr);
/* set up reply kvec. Subtract notification bitmap max size (2)
* so that notification bitmap is put in xdr_buf tail */
@@ -2657,27 +2582,24 @@ static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
args->pdev->pglen);
encode_nops(&hdr);
- return 0;
}
/*
* Encode LAYOUTGET request
*/
-static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
- struct nfs4_layoutget_args *args)
+static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_layoutget_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
- encode_layoutget(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, NFS_FH(args->inode), &hdr);
+ encode_layoutget(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
#endif /* CONFIG_NFS_V4_1 */
@@ -4475,7 +4397,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
goto out_overflow;
eof = be32_to_cpup(p++);
count = be32_to_cpup(p);
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+ hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base;
recvd = req->rq_rcv_buf.len - hdrlen;
if (count > recvd) {
dprintk("NFS: server cheating in read reply: "
@@ -4518,7 +4440,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
xdr_read_pages(xdr, pglen);
- return 0;
+ return pglen;
}
static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
@@ -5000,7 +4922,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
goto out_overflow;
len = be32_to_cpup(p);
if (len) {
- int i;
+ uint32_t i;
p = xdr_inline_decode(xdr, 4 * len);
if (unlikely(!p))
@@ -5090,26 +5012,26 @@ out_overflow:
/*
* Decode OPEN_DOWNGRADE response
*/
-static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
+static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs_closeres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_open_downgrade(&xdr, res);
+ status = decode_open_downgrade(xdr, res);
if (status != 0)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5118,26 +5040,25 @@ out:
/*
* Decode ACCESS response
*/
-static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res)
+static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_accessres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status != 0)
goto out;
- status = decode_access(&xdr, res);
+ status = decode_access(xdr, res);
if (status != 0)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5146,26 +5067,28 @@ out:
/*
* Decode LOOKUP response
*/
-static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res)
+static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_lookup_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_lookup(&xdr)) != 0)
+ status = decode_lookup(xdr);
+ if (status)
goto out;
- if ((status = decode_getfh(&xdr, res->fh)) != 0)
+ status = decode_getfh(xdr, res->fh);
+ if (status)
goto out;
- status = decode_getfattr(&xdr, res->fattr, res->server
+ status = decode_getfattr(xdr, res->fattr, res->server
,!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5174,23 +5097,25 @@ out:
/*
* Decode LOOKUP_ROOT response
*/
-static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res)
+static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs4_lookup_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- if ((status = decode_putrootfh(&xdr)) != 0)
+ status = decode_putrootfh(xdr);
+ if (status)
goto out;
- if ((status = decode_getfh(&xdr, res->fh)) == 0)
- status = decode_getfattr(&xdr, res->fattr, res->server,
+ status = decode_getfh(xdr, res->fh);
+ if (status == 0)
+ status = decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5199,24 +5124,25 @@ out:
/*
* Decode REMOVE response
*/
-static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_removeres *res)
+static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_removeres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
+ status = decode_remove(xdr, &res->cinfo);
+ if (status)
goto out;
- decode_getfattr(&xdr, res->dir_attr, res->server,
+ decode_getfattr(xdr, res->dir_attr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5225,34 +5151,38 @@ out:
/*
* Decode RENAME response
*/
-static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res)
+static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_renameres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_savefh(&xdr)) != 0)
+ status = decode_savefh(xdr);
+ if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0)
+ status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
+ if (status)
goto out;
/* Current FH is target directory */
- if (decode_getfattr(&xdr, res->new_fattr, res->server,
+ if (decode_getfattr(xdr, res->new_fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
goto out;
- if ((status = decode_restorefh(&xdr)) != 0)
+ status = decode_restorefh(xdr);
+ if (status)
goto out;
- decode_getfattr(&xdr, res->old_fattr, res->server,
+ decode_getfattr(xdr, res->old_fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5261,37 +5191,41 @@ out:
/*
* Decode LINK response
*/
-static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link_res *res)
+static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_link_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_savefh(&xdr)) != 0)
+ status = decode_savefh(xdr);
+ if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_link(&xdr, &res->cinfo)) != 0)
+ status = decode_link(xdr, &res->cinfo);
+ if (status)
goto out;
/*
* Note order: OP_LINK leaves the directory as the current
* filehandle.
*/
- if (decode_getfattr(&xdr, res->dir_attr, res->server,
+ if (decode_getfattr(xdr, res->dir_attr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
goto out;
- if ((status = decode_restorefh(&xdr)) != 0)
+ status = decode_restorefh(xdr);
+ if (status)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5300,33 +5234,37 @@ out:
/*
* Decode CREATE response
*/
-static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res)
+static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_create_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_savefh(&xdr)) != 0)
+ status = decode_savefh(xdr);
+ if (status)
goto out;
- if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0)
+ status = decode_create(xdr, &res->dir_cinfo);
+ if (status)
goto out;
- if ((status = decode_getfh(&xdr, res->fh)) != 0)
+ status = decode_getfh(xdr, res->fh);
+ if (status)
goto out;
- if (decode_getfattr(&xdr, res->fattr, res->server,
+ if (decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
goto out;
- if ((status = decode_restorefh(&xdr)) != 0)
+ status = decode_restorefh(xdr);
+ if (status)
goto out;
- decode_getfattr(&xdr, res->dir_fattr, res->server,
+ decode_getfattr(xdr, res->dir_fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5335,31 +5273,31 @@ out:
/*
* Decode SYMLINK response
*/
-static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res)
+static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_create_res *res)
{
- return nfs4_xdr_dec_create(rqstp, p, res);
+ return nfs4_xdr_dec_create(rqstp, xdr, res);
}
/*
* Decode GETATTR response
*/
-static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_getattr_res *res)
+static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_getattr_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_getfattr(&xdr, res->fattr, res->server,
+ status = decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5368,46 +5306,40 @@ out:
/*
* Encode an SETACL request
*/
-static int
-nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args)
+static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_setaclargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- int status;
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- status = encode_setacl(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_setacl(xdr, args, &hdr);
encode_nops(&hdr);
- return status;
}
/*
* Decode SETACL response
*/
static int
-nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p,
+nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
struct nfs_setaclres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_setattr(&xdr);
+ status = decode_setattr(xdr);
out:
return status;
}
@@ -5416,24 +5348,22 @@ out:
* Decode GETACL response
*/
static int
-nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p,
+nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
struct nfs_getaclres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_getacl(&xdr, rqstp, &res->acl_len);
+ status = decode_getacl(xdr, rqstp, &res->acl_len);
out:
return status;
@@ -5442,23 +5372,22 @@ out:
/*
* Decode CLOSE response
*/
-static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
+static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_closeres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_close(&xdr, res);
+ status = decode_close(xdr, res);
if (status != 0)
goto out;
/*
@@ -5467,7 +5396,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
* an ESTALE error. Shouldn't be a problem,
* though, since fattr->valid will remain unset.
*/
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5476,36 +5405,35 @@ out:
/*
* Decode OPEN response
*/
-static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
+static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_openres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_savefh(&xdr);
+ status = decode_savefh(xdr);
if (status)
goto out;
- status = decode_open(&xdr, res);
+ status = decode_open(xdr, res);
if (status)
goto out;
- if (decode_getfh(&xdr, &res->fh) != 0)
+ if (decode_getfh(xdr, &res->fh) != 0)
goto out;
- if (decode_getfattr(&xdr, res->f_attr, res->server,
+ if (decode_getfattr(xdr, res->f_attr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
goto out;
- if (decode_restorefh(&xdr) != 0)
+ if (decode_restorefh(xdr) != 0)
goto out;
- decode_getfattr(&xdr, res->dir_attr, res->server,
+ decode_getfattr(xdr, res->dir_attr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5514,20 +5442,20 @@ out:
/*
* Decode OPEN_CONFIRM response
*/
-static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res)
+static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs_open_confirmres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_open_confirm(&xdr, res);
+ status = decode_open_confirm(xdr, res);
out:
return status;
}
@@ -5535,26 +5463,26 @@ out:
/*
* Decode OPEN response
*/
-static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
+static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs_openres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_open(&xdr, res);
+ status = decode_open(xdr, res);
if (status)
goto out;
- decode_getfattr(&xdr, res->f_attr, res->server,
+ decode_getfattr(xdr, res->f_attr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5563,26 +5491,26 @@ out:
/*
* Decode SETATTR response
*/
-static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res)
+static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs_setattrres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_setattr(&xdr);
+ status = decode_setattr(xdr);
if (status)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5591,23 +5519,22 @@ out:
/*
* Decode LOCK response
*/
-static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_res *res)
+static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_lock_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_lock(&xdr, res);
+ status = decode_lock(xdr, res);
out:
return status;
}
@@ -5615,23 +5542,22 @@ out:
/*
* Decode LOCKT response
*/
-static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lockt_res *res)
+static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_lockt_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_lockt(&xdr, res);
+ status = decode_lockt(xdr, res);
out:
return status;
}
@@ -5639,61 +5565,58 @@ out:
/*
* Decode LOCKU response
*/
-static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_locku_res *res)
+static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_locku_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_locku(&xdr, res);
+ status = decode_locku(xdr, res);
out:
return status;
}
-static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr, void *dummy)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_release_lockowner(&xdr);
+ status = decode_release_lockowner(xdr);
return status;
}
/*
* Decode READLINK response
*/
-static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p,
+static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs4_readlink_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_readlink(&xdr, rqstp);
+ status = decode_readlink(xdr, rqstp);
out:
return status;
}
@@ -5701,23 +5624,22 @@ out:
/*
* Decode READDIR response
*/
-static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_readdir_res *res)
+static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_readdir_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_readdir(&xdr, rqstp, res);
+ status = decode_readdir(xdr, rqstp, res);
out:
return status;
}
@@ -5725,23 +5647,22 @@ out:
/*
* Decode Read response
*/
-static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readres *res)
+static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_readres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_read(&xdr, rqstp, res);
+ status = decode_read(xdr, rqstp, res);
if (!status)
status = res->count;
out:
@@ -5751,26 +5672,25 @@ out:
/*
* Decode WRITE response
*/
-static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res)
+static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_writeres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_write(&xdr, res);
+ status = decode_write(xdr, res);
if (status)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
if (!status)
status = res->count;
@@ -5781,26 +5701,25 @@ out:
/*
* Decode COMMIT response
*/
-static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res)
+static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_writeres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_commit(&xdr, res);
+ status = decode_commit(xdr, res);
if (status)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5809,85 +5728,80 @@ out:
/*
* Decode FSINFO response
*/
-static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
struct nfs4_fsinfo_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_sequence(&xdr, &res->seq_res, req);
+ status = decode_sequence(xdr, &res->seq_res, req);
if (!status)
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (!status)
- status = decode_fsinfo(&xdr, res->fsinfo);
+ status = decode_fsinfo(xdr, res->fsinfo);
return status;
}
/*
* Decode PATHCONF response
*/
-static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
struct nfs4_pathconf_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_sequence(&xdr, &res->seq_res, req);
+ status = decode_sequence(xdr, &res->seq_res, req);
if (!status)
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (!status)
- status = decode_pathconf(&xdr, res->pathconf);
+ status = decode_pathconf(xdr, res->pathconf);
return status;
}
/*
* Decode STATFS response
*/
-static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
struct nfs4_statfs_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_sequence(&xdr, &res->seq_res, req);
+ status = decode_sequence(xdr, &res->seq_res, req);
if (!status)
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (!status)
- status = decode_statfs(&xdr, res->fsstat);
+ status = decode_statfs(xdr, res->fsstat);
return status;
}
/*
* Decode GETATTR_BITMAP response
*/
-static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4_server_caps_res *res)
+static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_server_caps_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, req);
+ status = decode_sequence(xdr, &res->seq_res, req);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- status = decode_server_caps(&xdr, res);
+ status = decode_server_caps(xdr, res);
out:
return status;
}
@@ -5895,79 +5809,77 @@ out:
/*
* Decode RENEW response
*/
-static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ void *__unused)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_renew(&xdr);
+ status = decode_renew(xdr);
return status;
}
/*
* Decode SETCLIENTID response
*/
-static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
- struct nfs4_setclientid_res *res)
+static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_setclientid_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_setclientid(&xdr, res);
+ status = decode_setclientid(xdr, res);
return status;
}
/*
* Decode SETCLIENTID_CONFIRM response
*/
-static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo)
+static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fsinfo *fsinfo)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_setclientid_confirm(&xdr);
+ status = decode_setclientid_confirm(xdr);
if (!status)
- status = decode_putrootfh(&xdr);
+ status = decode_putrootfh(xdr);
if (!status)
- status = decode_fsinfo(&xdr, fsinfo);
+ status = decode_fsinfo(xdr, fsinfo);
return status;
}
/*
* Decode DELEGRETURN response
*/
-static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_delegreturnres *res)
+static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs4_delegreturnres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status != 0)
goto out;
- status = decode_delegreturn(&xdr);
+ status = decode_delegreturn(xdr);
if (status != 0)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5976,26 +5888,27 @@ out:
/*
* Decode FS_LOCATIONS response
*/
-static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
struct nfs4_fs_locations_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, req);
+ status = decode_sequence(xdr, &res->seq_res, req);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_lookup(&xdr)) != 0)
+ status = decode_lookup(xdr);
+ if (status)
goto out;
- xdr_enter_page(&xdr, PAGE_SIZE);
- status = decode_getfattr(&xdr, &res->fs_locations->fattr,
+ xdr_enter_page(xdr, PAGE_SIZE);
+ status = decode_getfattr(xdr, &res->fs_locations->fattr,
res->fs_locations->server,
!RPC_IS_ASYNC(req->rq_task));
out:
@@ -6006,129 +5919,122 @@ out:
/*
* Decode EXCHANGE_ID response
*/
-static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
void *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_exchange_id(&xdr, res);
+ status = decode_exchange_id(xdr, res);
return status;
}
/*
* Decode CREATE_SESSION response
*/
-static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs41_create_session_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_create_session(&xdr, res);
+ status = decode_create_session(xdr, res);
return status;
}
/*
* Decode DESTROY_SESSION response
*/
-static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p,
- void *dummy)
+static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_destroy_session(&xdr, dummy);
+ status = decode_destroy_session(xdr, res);
return status;
}
/*
* Decode SEQUENCE response
*/
-static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs4_sequence_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_sequence(&xdr, res, rqstp);
+ status = decode_sequence(xdr, res, rqstp);
return status;
}
/*
* Decode GET_LEASE_TIME response
*/
-static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs4_get_lease_time_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_sequence(&xdr, &res->lr_seq_res, rqstp);
+ status = decode_sequence(xdr, &res->lr_seq_res, rqstp);
if (!status)
- status = decode_putrootfh(&xdr);
+ status = decode_putrootfh(xdr);
if (!status)
- status = decode_fsinfo(&xdr, res->lr_fsinfo);
+ status = decode_fsinfo(xdr, res->lr_fsinfo);
return status;
}
/*
* Decode RECLAIM_COMPLETE response
*/
-static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs41_reclaim_complete_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (!status)
- status = decode_reclaim_complete(&xdr, (void *)NULL);
+ status = decode_reclaim_complete(xdr, (void *)NULL);
return status;
}
/*
* Decode GETDEVINFO response
*/
-static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs4_getdeviceinfo_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status != 0)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status != 0)
goto out;
- status = decode_getdeviceinfo(&xdr, res->pdev);
+ status = decode_getdeviceinfo(xdr, res->pdev);
out:
return status;
}
@@ -6136,31 +6042,44 @@ out:
/*
* Decode LAYOUTGET response
*/
-static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs4_layoutget_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_layoutget(&xdr, rqstp, res);
+ status = decode_layoutget(xdr, rqstp, res);
out:
return status;
}
#endif /* CONFIG_NFS_V4_1 */
-__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
- struct nfs_server *server, int plus)
+/**
+ * nfs4_decode_dirent - Decode a single NFSv4 directory entry stored in
+ * the local page cache.
+ * @xdr: XDR stream where entry resides
+ * @entry: buffer to fill in with entry data
+ * @plus: boolean indicating whether this should be a readdirplus entry
+ *
+ * Returns zero if successful, otherwise a negative errno value is
+ * returned.
+ *
+ * This function is not invoked during READDIR reply decoding, but
+ * rather whenever an application invokes the getdents(2) system call
+ * on a directory already in our cache.
+ */
+int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+ int plus)
{
uint32_t bitmap[2] = {0};
uint32_t len;
@@ -6172,9 +6091,9 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
if (unlikely(!p))
goto out_overflow;
if (!ntohl(*p++))
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
entry->eof = 1;
- return ERR_PTR(-EBADCOOKIE);
+ return -EBADCOOKIE;
}
p = xdr_inline_decode(xdr, 12);
@@ -6203,25 +6122,24 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
if (decode_attr_length(xdr, &len, &p) < 0)
goto out_overflow;
- if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0)
+ if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
+ entry->server, 1) < 0)
goto out_overflow;
if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
entry->ino = entry->fattr->fileid;
+ entry->d_type = DT_UNKNOWN;
+ if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE)
+ entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
+
if (verify_attr_len(xdr, p, len) < 0)
goto out_overflow;
- p = xdr_inline_peek(xdr, 8);
- if (p != NULL)
- entry->eof = !p[0] && p[1];
- else
- entry->eof = 0;
-
- return p;
+ return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
- return ERR_PTR(-EIO);
+ return -EAGAIN;
}
/*
@@ -6297,8 +6215,8 @@ nfs4_stat_to_errno(int stat)
#define PROC(proc, argtype, restype) \
[NFSPROC4_CLNT_##proc] = { \
.p_proc = NFSPROC4_COMPOUND, \
- .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
- .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
+ .p_encode = (kxdreproc_t)nfs4_xdr_##argtype, \
+ .p_decode = (kxdrdproc_t)nfs4_xdr_##restype, \
.p_arglen = NFS4_##argtype##_sz, \
.p_replen = NFS4_##restype##_sz, \
.p_statidx = NFSPROC4_CLNT_##proc, \
@@ -6306,50 +6224,50 @@ nfs4_stat_to_errno(int stat)
}
struct rpc_procinfo nfs4_procedures[] = {
- PROC(READ, enc_read, dec_read),
- PROC(WRITE, enc_write, dec_write),
- PROC(COMMIT, enc_commit, dec_commit),
- PROC(OPEN, enc_open, dec_open),
- PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm),
- PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr),
- PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade),
- PROC(CLOSE, enc_close, dec_close),
- PROC(SETATTR, enc_setattr, dec_setattr),
- PROC(FSINFO, enc_fsinfo, dec_fsinfo),
- PROC(RENEW, enc_renew, dec_renew),
- PROC(SETCLIENTID, enc_setclientid, dec_setclientid),
- PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm),
- PROC(LOCK, enc_lock, dec_lock),
- PROC(LOCKT, enc_lockt, dec_lockt),
- PROC(LOCKU, enc_locku, dec_locku),
- PROC(ACCESS, enc_access, dec_access),
- PROC(GETATTR, enc_getattr, dec_getattr),
- PROC(LOOKUP, enc_lookup, dec_lookup),
- PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root),
- PROC(REMOVE, enc_remove, dec_remove),
- PROC(RENAME, enc_rename, dec_rename),
- PROC(LINK, enc_link, dec_link),
- PROC(SYMLINK, enc_symlink, dec_symlink),
- PROC(CREATE, enc_create, dec_create),
- PROC(PATHCONF, enc_pathconf, dec_pathconf),
- PROC(STATFS, enc_statfs, dec_statfs),
- PROC(READLINK, enc_readlink, dec_readlink),
- PROC(READDIR, enc_readdir, dec_readdir),
- PROC(SERVER_CAPS, enc_server_caps, dec_server_caps),
- PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn),
- PROC(GETACL, enc_getacl, dec_getacl),
- PROC(SETACL, enc_setacl, dec_setacl),
- PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
- PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
+ PROC(READ, enc_read, dec_read),
+ PROC(WRITE, enc_write, dec_write),
+ PROC(COMMIT, enc_commit, dec_commit),
+ PROC(OPEN, enc_open, dec_open),
+ PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm),
+ PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr),
+ PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade),
+ PROC(CLOSE, enc_close, dec_close),
+ PROC(SETATTR, enc_setattr, dec_setattr),
+ PROC(FSINFO, enc_fsinfo, dec_fsinfo),
+ PROC(RENEW, enc_renew, dec_renew),
+ PROC(SETCLIENTID, enc_setclientid, dec_setclientid),
+ PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm),
+ PROC(LOCK, enc_lock, dec_lock),
+ PROC(LOCKT, enc_lockt, dec_lockt),
+ PROC(LOCKU, enc_locku, dec_locku),
+ PROC(ACCESS, enc_access, dec_access),
+ PROC(GETATTR, enc_getattr, dec_getattr),
+ PROC(LOOKUP, enc_lookup, dec_lookup),
+ PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root),
+ PROC(REMOVE, enc_remove, dec_remove),
+ PROC(RENAME, enc_rename, dec_rename),
+ PROC(LINK, enc_link, dec_link),
+ PROC(SYMLINK, enc_symlink, dec_symlink),
+ PROC(CREATE, enc_create, dec_create),
+ PROC(PATHCONF, enc_pathconf, dec_pathconf),
+ PROC(STATFS, enc_statfs, dec_statfs),
+ PROC(READLINK, enc_readlink, dec_readlink),
+ PROC(READDIR, enc_readdir, dec_readdir),
+ PROC(SERVER_CAPS, enc_server_caps, dec_server_caps),
+ PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn),
+ PROC(GETACL, enc_getacl, dec_getacl),
+ PROC(SETACL, enc_setacl, dec_setacl),
+ PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
+ PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
#if defined(CONFIG_NFS_V4_1)
- PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
- PROC(CREATE_SESSION, enc_create_session, dec_create_session),
- PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session),
- PROC(SEQUENCE, enc_sequence, dec_sequence),
- PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
- PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
- PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
- PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
+ PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
+ PROC(CREATE_SESSION, enc_create_session, dec_create_session),
+ PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session),
+ PROC(SEQUENCE, enc_sequence, dec_sequence),
+ PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
+ PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
+ PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
+ PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
#endif /* CONFIG_NFS_V4_1 */
};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 9194902..e1164e3 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,12 +26,9 @@ static struct kmem_cache *nfs_page_cachep;
static inline struct nfs_page *
nfs_page_alloc(void)
{
- struct nfs_page *p;
- p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL);
- if (p) {
- memset(p, 0, sizeof(*p));
+ struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL);
+ if (p)
INIT_LIST_HEAD(&p->wb_list);
- }
return p;
}
@@ -65,6 +62,13 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
if (req == NULL)
return ERR_PTR(-ENOMEM);
+ /* get lock context early so we can deal with alloc failures */
+ req->wb_lock_context = nfs_get_lock_context(ctx);
+ if (req->wb_lock_context == NULL) {
+ nfs_page_free(req);
+ return ERR_PTR(-ENOMEM);
+ }
+
/* Initialize the request struct. Initially, we assume a
* long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */
@@ -79,7 +83,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
req->wb_pgbase = offset;
req->wb_bytes = count;
req->wb_context = get_nfs_open_context(ctx);
- req->wb_lock_context = nfs_get_lock_context(ctx);
kref_init(&req->wb_kref);
return req;
}
@@ -109,7 +112,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
{
if (!nfs_lock_request_dontget(req))
return 0;
- if (req->wb_page != NULL)
+ if (test_bit(PG_MAPPED, &req->wb_flags))
radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
return 1;
}
@@ -119,7 +122,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
*/
void nfs_clear_page_tag_locked(struct nfs_page *req)
{
- if (req->wb_page != NULL) {
+ if (test_bit(PG_MAPPED, &req->wb_flags)) {
struct inode *inode = req->wb_context->path.dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index db77342..bc40897 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -177,105 +177,149 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
* pNFS client layout cache
*/
+/* Need to hold i_lock if caller does not already hold reference */
+void
+get_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+ atomic_inc(&lo->plh_refcount);
+}
+
static void
-get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
+destroy_layout_hdr(struct pnfs_layout_hdr *lo)
{
- assert_spin_locked(&lo->inode->i_lock);
- lo->refcount++;
+ dprintk("%s: freeing layout cache %p\n", __func__, lo);
+ BUG_ON(!list_empty(&lo->plh_layouts));
+ NFS_I(lo->plh_inode)->layout = NULL;
+ kfree(lo);
}
static void
put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
{
- assert_spin_locked(&lo->inode->i_lock);
- BUG_ON(lo->refcount == 0);
-
- lo->refcount--;
- if (!lo->refcount) {
- dprintk("%s: freeing layout cache %p\n", __func__, lo);
- BUG_ON(!list_empty(&lo->layouts));
- NFS_I(lo->inode)->layout = NULL;
- kfree(lo);
- }
+ if (atomic_dec_and_test(&lo->plh_refcount))
+ destroy_layout_hdr(lo);
}
void
-put_layout_hdr(struct inode *inode)
+put_layout_hdr(struct pnfs_layout_hdr *lo)
{
- spin_lock(&inode->i_lock);
- put_layout_hdr_locked(NFS_I(inode)->layout);
- spin_unlock(&inode->i_lock);
+ struct inode *inode = lo->plh_inode;
+
+ if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
+ destroy_layout_hdr(lo);
+ spin_unlock(&inode->i_lock);
+ }
}
static void
init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
{
- INIT_LIST_HEAD(&lseg->fi_list);
- kref_init(&lseg->kref);
- lseg->layout = lo;
+ INIT_LIST_HEAD(&lseg->pls_list);
+ atomic_set(&lseg->pls_refcount, 1);
+ smp_mb();
+ set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
+ lseg->pls_layout = lo;
}
-/* Called without i_lock held, as the free_lseg call may sleep */
-static void
-destroy_lseg(struct kref *kref)
+static void free_lseg(struct pnfs_layout_segment *lseg)
{
- struct pnfs_layout_segment *lseg =
- container_of(kref, struct pnfs_layout_segment, kref);
- struct inode *ino = lseg->layout->inode;
+ struct inode *ino = lseg->pls_layout->plh_inode;
- dprintk("--> %s\n", __func__);
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
- /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
- put_layout_hdr(ino);
+ /* Matched by get_layout_hdr in pnfs_insert_layout */
+ put_layout_hdr(NFS_I(ino)->layout);
}
-static void
-put_lseg(struct pnfs_layout_segment *lseg)
+/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
+ * could sleep, so must be called outside of the lock.
+ * Returns 1 if object was removed, otherwise return 0.
+ */
+static int
+put_lseg_locked(struct pnfs_layout_segment *lseg,
+ struct list_head *tmp_list)
+{
+ dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
+ atomic_read(&lseg->pls_refcount),
+ test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+ if (atomic_dec_and_test(&lseg->pls_refcount)) {
+ struct inode *ino = lseg->pls_layout->plh_inode;
+
+ BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+ list_del(&lseg->pls_list);
+ if (list_empty(&lseg->pls_layout->plh_segs)) {
+ struct nfs_client *clp;
+
+ clp = NFS_SERVER(ino)->nfs_client;
+ spin_lock(&clp->cl_lock);
+ /* List does not take a reference, so no need for put here */
+ list_del_init(&lseg->pls_layout->plh_layouts);
+ spin_unlock(&clp->cl_lock);
+ clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
+ }
+ rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
+ list_add(&lseg->pls_list, tmp_list);
+ return 1;
+ }
+ return 0;
+}
+
+static bool
+should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
{
- if (!lseg)
- return;
+ return (recall_iomode == IOMODE_ANY ||
+ lseg_iomode == recall_iomode);
+}
- dprintk("%s: lseg %p ref %d\n", __func__, lseg,
- atomic_read(&lseg->kref.refcount));
- kref_put(&lseg->kref, destroy_lseg);
+/* Returns 1 if lseg is removed from list, 0 otherwise */
+static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
+ struct list_head *tmp_list)
+{
+ int rv = 0;
+
+ if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
+ /* Remove the reference keeping the lseg in the
+ * list. It will now be removed when all
+ * outstanding io is finished.
+ */
+ rv = put_lseg_locked(lseg, tmp_list);
+ }
+ return rv;
}
-static void
-pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
+/* Returns count of number of matching invalid lsegs remaining in list
+ * after call.
+ */
+int
+mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
+ struct list_head *tmp_list,
+ u32 iomode)
{
struct pnfs_layout_segment *lseg, *next;
- struct nfs_client *clp;
+ int invalid = 0, removed = 0;
dprintk("%s:Begin lo %p\n", __func__, lo);
- assert_spin_locked(&lo->inode->i_lock);
- list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
- dprintk("%s: freeing lseg %p\n", __func__, lseg);
- list_move(&lseg->fi_list, tmp_list);
- }
- clp = NFS_SERVER(lo->inode)->nfs_client;
- spin_lock(&clp->cl_lock);
- /* List does not take a reference, so no need for put here */
- list_del_init(&lo->layouts);
- spin_unlock(&clp->cl_lock);
- write_seqlock(&lo->seqlock);
- clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
- write_sequnlock(&lo->seqlock);
-
- dprintk("%s:Return\n", __func__);
+ list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
+ if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
+ dprintk("%s: freeing lseg %p iomode %d "
+ "offset %llu length %llu\n", __func__,
+ lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
+ lseg->pls_range.length);
+ invalid++;
+ removed += mark_lseg_invalid(lseg, tmp_list);
+ }
+ dprintk("%s:Return %i\n", __func__, invalid - removed);
+ return invalid - removed;
}
-static void
-pnfs_free_lseg_list(struct list_head *tmp_list)
+void
+pnfs_free_lseg_list(struct list_head *free_me)
{
- struct pnfs_layout_segment *lseg;
+ struct pnfs_layout_segment *lseg, *tmp;
- while (!list_empty(tmp_list)) {
- lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
- fi_list);
- dprintk("%s calling put_lseg on %p\n", __func__, lseg);
- list_del(&lseg->fi_list);
- put_lseg(lseg);
+ list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
+ list_del(&lseg->pls_list);
+ free_lseg(lseg);
}
}
@@ -288,7 +332,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
spin_lock(&nfsi->vfs_inode.i_lock);
lo = nfsi->layout;
if (lo) {
- pnfs_clear_lseg_list(lo, &tmp_list);
+ set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags);
+ mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
/* Matched by refcount set to 1 in alloc_init_layout_hdr */
put_layout_hdr_locked(lo);
}
@@ -312,76 +357,80 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
while (!list_empty(&tmp_list)) {
lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
- layouts);
+ plh_layouts);
dprintk("%s freeing layout for inode %lu\n", __func__,
- lo->inode->i_ino);
- pnfs_destroy_layout(NFS_I(lo->inode));
+ lo->plh_inode->i_ino);
+ pnfs_destroy_layout(NFS_I(lo->plh_inode));
}
}
-/* update lo->stateid with new if is more recent
- *
- * lo->stateid could be the open stateid, in which case we just use what given.
- */
-static void
-pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
- const nfs4_stateid *new)
-{
- nfs4_stateid *old = &lo->stateid;
- bool overwrite = false;
-
- write_seqlock(&lo->seqlock);
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
- memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
- overwrite = true;
- else {
- u32 oldseq, newseq;
-
- oldseq = be32_to_cpu(old->stateid.seqid);
- newseq = be32_to_cpu(new->stateid.seqid);
- if ((int)(newseq - oldseq) > 0)
- overwrite = true;
+/* update lo->plh_stateid with new if is more recent */
+void
+pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
+ bool update_barrier)
+{
+ u32 oldseq, newseq;
+
+ oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
+ newseq = be32_to_cpu(new->stateid.seqid);
+ if ((int)(newseq - oldseq) > 0) {
+ memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
+ if (update_barrier) {
+ u32 new_barrier = be32_to_cpu(new->stateid.seqid);
+
+ if ((int)(new_barrier - lo->plh_barrier))
+ lo->plh_barrier = new_barrier;
+ } else {
+ /* Because of wraparound, we want to keep the barrier
+ * "close" to the current seqids. It needs to be
+ * within 2**31 to count as "behind", so if it
+ * gets too near that limit, give us a litle leeway
+ * and bring it to within 2**30.
+ * NOTE - and yes, this is all unsigned arithmetic.
+ */
+ if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
+ lo->plh_barrier = newseq - (1 << 30);
+ }
}
- if (overwrite)
- memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
- write_sequnlock(&lo->seqlock);
}
-static void
-pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
- struct nfs4_state *state)
+/* lget is set to 1 if called from inside send_layoutget call chain */
+static bool
+pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
+ int lget)
{
- int seq;
-
- dprintk("--> %s\n", __func__);
- write_seqlock(&lo->seqlock);
- do {
- seq = read_seqbegin(&state->seqlock);
- memcpy(lo->stateid.data, state->stateid.data,
- sizeof(state->stateid.data));
- } while (read_seqretry(&state->seqlock, seq));
- set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
- write_sequnlock(&lo->seqlock);
- dprintk("<-- %s\n", __func__);
+ if ((stateid) &&
+ (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
+ return true;
+ return lo->plh_block_lgets ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
+ (list_empty(&lo->plh_segs) &&
+ (atomic_read(&lo->plh_outstanding) > lget));
}
-void
-pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
- struct nfs4_state *open_state)
+int
+pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
+ struct nfs4_state *open_state)
{
- int seq;
+ int status = 0;
dprintk("--> %s\n", __func__);
- do {
- seq = read_seqbegin(&lo->seqlock);
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
- /* This will trigger retry of the read */
- pnfs_layout_from_open_stateid(lo, open_state);
- } else
- memcpy(dst->data, lo->stateid.data,
- sizeof(lo->stateid.data));
- } while (read_seqretry(&lo->seqlock, seq));
+ spin_lock(&lo->plh_inode->i_lock);
+ if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
+ status = -EAGAIN;
+ } else if (list_empty(&lo->plh_segs)) {
+ int seq;
+
+ do {
+ seq = read_seqbegin(&open_state->seqlock);
+ memcpy(dst->data, open_state->stateid.data,
+ sizeof(open_state->stateid.data));
+ } while (read_seqretry(&open_state->seqlock, seq));
+ } else
+ memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
+ spin_unlock(&lo->plh_inode->i_lock);
dprintk("<-- %s\n", __func__);
+ return status;
}
/*
@@ -395,7 +444,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
struct nfs_open_context *ctx,
u32 iomode)
{
- struct inode *ino = lo->inode;
+ struct inode *ino = lo->plh_inode;
struct nfs_server *server = NFS_SERVER(ino);
struct nfs4_layoutget *lgp;
struct pnfs_layout_segment *lseg = NULL;
@@ -404,10 +453,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
BUG_ON(ctx == NULL);
lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
- if (lgp == NULL) {
- put_layout_hdr(lo->inode);
+ if (lgp == NULL)
return NULL;
- }
lgp->args.minlength = NFS4_MAX_UINT64;
lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
lgp->args.range.iomode = iomode;
@@ -424,11 +471,88 @@ send_layoutget(struct pnfs_layout_hdr *lo,
nfs4_proc_layoutget(lgp);
if (!lseg) {
/* remember that LAYOUTGET failed and suspend trying */
- set_bit(lo_fail_bit(iomode), &lo->state);
+ set_bit(lo_fail_bit(iomode), &lo->plh_flags);
}
return lseg;
}
+bool pnfs_roc(struct inode *ino)
+{
+ struct pnfs_layout_hdr *lo;
+ struct pnfs_layout_segment *lseg, *tmp;
+ LIST_HEAD(tmp_list);
+ bool found = false;
+
+ spin_lock(&ino->i_lock);
+ lo = NFS_I(ino)->layout;
+ if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+ goto out_nolayout;
+ list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
+ if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
+ mark_lseg_invalid(lseg, &tmp_list);
+ found = true;
+ }
+ if (!found)
+ goto out_nolayout;
+ lo->plh_block_lgets++;
+ get_layout_hdr(lo); /* matched in pnfs_roc_release */
+ spin_unlock(&ino->i_lock);
+ pnfs_free_lseg_list(&tmp_list);
+ return true;
+
+out_nolayout:
+ spin_unlock(&ino->i_lock);
+ return false;
+}
+
+void pnfs_roc_release(struct inode *ino)
+{
+ struct pnfs_layout_hdr *lo;
+
+ spin_lock(&ino->i_lock);
+ lo = NFS_I(ino)->layout;
+ lo->plh_block_lgets--;
+ put_layout_hdr_locked(lo);
+ spin_unlock(&ino->i_lock);
+}
+
+void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
+{
+ struct pnfs_layout_hdr *lo;
+
+ spin_lock(&ino->i_lock);
+ lo = NFS_I(ino)->layout;
+ if ((int)(barrier - lo->plh_barrier) > 0)
+ lo->plh_barrier = barrier;
+ spin_unlock(&ino->i_lock);
+}
+
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
+{
+ struct nfs_inode *nfsi = NFS_I(ino);
+ struct pnfs_layout_segment *lseg;
+ bool found = false;
+
+ spin_lock(&ino->i_lock);
+ list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
+ if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
+ found = true;
+ break;
+ }
+ if (!found) {
+ struct pnfs_layout_hdr *lo = nfsi->layout;
+ u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
+
+ /* Since close does not return a layout stateid for use as
+ * a barrier, we choose the worst-case barrier.
+ */
+ *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
+ }
+ spin_unlock(&ino->i_lock);
+ return found;
+}
+
/*
* Compare two layout segments for sorting into layout cache.
* We want to preferentially return RW over RO layouts, so ensure those
@@ -450,37 +574,29 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
dprintk("%s:Begin\n", __func__);
- assert_spin_locked(&lo->inode->i_lock);
- if (list_empty(&lo->segs)) {
- struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
-
- spin_lock(&clp->cl_lock);
- BUG_ON(!list_empty(&lo->layouts));
- list_add_tail(&lo->layouts, &clp->cl_layouts);
- spin_unlock(&clp->cl_lock);
- }
- list_for_each_entry(lp, &lo->segs, fi_list) {
- if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
+ assert_spin_locked(&lo->plh_inode->i_lock);
+ list_for_each_entry(lp, &lo->plh_segs, pls_list) {
+ if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
continue;
- list_add_tail(&lseg->fi_list, &lp->fi_list);
+ list_add_tail(&lseg->pls_list, &lp->pls_list);
dprintk("%s: inserted lseg %p "
"iomode %d offset %llu length %llu before "
"lp %p iomode %d offset %llu length %llu\n",
- __func__, lseg, lseg->range.iomode,
- lseg->range.offset, lseg->range.length,
- lp, lp->range.iomode, lp->range.offset,
- lp->range.length);
+ __func__, lseg, lseg->pls_range.iomode,
+ lseg->pls_range.offset, lseg->pls_range.length,
+ lp, lp->pls_range.iomode, lp->pls_range.offset,
+ lp->pls_range.length);
found = 1;
break;
}
if (!found) {
- list_add_tail(&lseg->fi_list, &lo->segs);
+ list_add_tail(&lseg->pls_list, &lo->plh_segs);
dprintk("%s: inserted lseg %p "
"iomode %d offset %llu length %llu at tail\n",
- __func__, lseg, lseg->range.iomode,
- lseg->range.offset, lseg->range.length);
+ __func__, lseg, lseg->pls_range.iomode,
+ lseg->pls_range.offset, lseg->pls_range.length);
}
- get_layout_hdr_locked(lo);
+ get_layout_hdr(lo);
dprintk("%s:Return\n", __func__);
}
@@ -493,11 +609,11 @@ alloc_init_layout_hdr(struct inode *ino)
lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
if (!lo)
return NULL;
- lo->refcount = 1;
- INIT_LIST_HEAD(&lo->layouts);
- INIT_LIST_HEAD(&lo->segs);
- seqlock_init(&lo->seqlock);
- lo->inode = ino;
+ atomic_set(&lo->plh_refcount, 1);
+ INIT_LIST_HEAD(&lo->plh_layouts);
+ INIT_LIST_HEAD(&lo->plh_segs);
+ INIT_LIST_HEAD(&lo->plh_bulk_recall);
+ lo->plh_inode = ino;
return lo;
}
@@ -510,9 +626,12 @@ pnfs_find_alloc_layout(struct inode *ino)
dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
assert_spin_locked(&ino->i_lock);
- if (nfsi->layout)
- return nfsi->layout;
-
+ if (nfsi->layout) {
+ if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
+ return NULL;
+ else
+ return nfsi->layout;
+ }
spin_unlock(&ino->i_lock);
new = alloc_init_layout_hdr(ino);
spin_lock(&ino->i_lock);
@@ -538,31 +657,32 @@ pnfs_find_alloc_layout(struct inode *ino)
static int
is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
{
- return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
+ return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
}
/*
* lookup range in layout
*/
static struct pnfs_layout_segment *
-pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
+pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
{
struct pnfs_layout_segment *lseg, *ret = NULL;
dprintk("%s:Begin\n", __func__);
- assert_spin_locked(&lo->inode->i_lock);
- list_for_each_entry(lseg, &lo->segs, fi_list) {
- if (is_matching_lseg(lseg, iomode)) {
+ assert_spin_locked(&lo->plh_inode->i_lock);
+ list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
+ if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
+ is_matching_lseg(lseg, iomode)) {
ret = lseg;
break;
}
- if (cmp_layout(iomode, lseg->range.iomode) > 0)
+ if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
break;
}
dprintk("%s:Return lseg %p ref %d\n",
- __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
+ __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
return ret;
}
@@ -576,6 +696,7 @@ pnfs_update_layout(struct inode *ino,
enum pnfs_iomode iomode)
{
struct nfs_inode *nfsi = NFS_I(ino);
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg = NULL;
@@ -588,25 +709,53 @@ pnfs_update_layout(struct inode *ino,
goto out_unlock;
}
- /* Check to see if the layout for the given range already exists */
- lseg = pnfs_has_layout(lo, iomode);
- if (lseg) {
- dprintk("%s: Using cached lseg %p for iomode %d)\n",
- __func__, lseg, iomode);
+ /* Do we even need to bother with this? */
+ if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ dprintk("%s matches recall, use MDS\n", __func__);
goto out_unlock;
}
+ /* Check to see if the layout for the given range already exists */
+ lseg = pnfs_find_lseg(lo, iomode);
+ if (lseg)
+ goto out_unlock;
/* if LAYOUTGET already failed once we don't try again */
- if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
+ if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
+ goto out_unlock;
+
+ if (pnfs_layoutgets_blocked(lo, NULL, 0))
goto out_unlock;
+ atomic_inc(&lo->plh_outstanding);
- get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
+ get_layout_hdr(lo);
+ if (list_empty(&lo->plh_segs)) {
+ /* The lo must be on the clp list if there is any
+ * chance of a CB_LAYOUTRECALL(FILE) coming in.
+ */
+ spin_lock(&clp->cl_lock);
+ BUG_ON(!list_empty(&lo->plh_layouts));
+ list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
+ spin_unlock(&clp->cl_lock);
+ }
spin_unlock(&ino->i_lock);
lseg = send_layoutget(lo, ctx, iomode);
+ if (!lseg) {
+ spin_lock(&ino->i_lock);
+ if (list_empty(&lo->plh_segs)) {
+ spin_lock(&clp->cl_lock);
+ list_del_init(&lo->plh_layouts);
+ spin_unlock(&clp->cl_lock);
+ clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+ }
+ spin_unlock(&ino->i_lock);
+ }
+ atomic_dec(&lo->plh_outstanding);
+ put_layout_hdr(lo);
out:
dprintk("%s end, state 0x%lx lseg %p\n", __func__,
- nfsi->layout->state, lseg);
+ nfsi->layout->plh_flags, lseg);
return lseg;
out_unlock:
spin_unlock(&ino->i_lock);
@@ -619,9 +768,21 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
struct nfs4_layoutget_res *res = &lgp->res;
struct pnfs_layout_segment *lseg;
- struct inode *ino = lo->inode;
+ struct inode *ino = lo->plh_inode;
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
int status = 0;
+ /* Verify we got what we asked for.
+ * Note that because the xdr parsing only accepts a single
+ * element array, this can fail even if the server is behaving
+ * correctly.
+ */
+ if (lgp->args.range.iomode > res->range.iomode ||
+ res->range.offset != 0 ||
+ res->range.length != NFS4_MAX_UINT64) {
+ status = -EINVAL;
+ goto out;
+ }
/* Inject layout blob into I/O device driver */
lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
if (!lseg || IS_ERR(lseg)) {
@@ -635,16 +796,37 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
}
spin_lock(&ino->i_lock);
+ if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ dprintk("%s forget reply due to recall\n", __func__);
+ goto out_forget_reply;
+ }
+
+ if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
+ dprintk("%s forget reply due to state\n", __func__);
+ goto out_forget_reply;
+ }
init_lseg(lo, lseg);
- lseg->range = res->range;
+ lseg->pls_range = res->range;
*lgp->lsegpp = lseg;
pnfs_insert_layout(lo, lseg);
+ if (res->return_on_close) {
+ set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
+ set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
+ }
+
/* Done processing layoutget. Set the layout stateid */
- pnfs_set_layout_stateid(lo, &res->stateid);
+ pnfs_set_layout_stateid(lo, &res->stateid, false);
spin_unlock(&ino->i_lock);
out:
return status;
+
+out_forget_reply:
+ spin_unlock(&ino->i_lock);
+ lseg->pls_layout = lo;
+ NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+ goto out;
}
/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e12367d..e2612ea 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -30,11 +30,17 @@
#ifndef FS_NFS_PNFS_H
#define FS_NFS_PNFS_H
+enum {
+ NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
+ NFS_LSEG_ROC, /* roc bit received from server */
+};
+
struct pnfs_layout_segment {
- struct list_head fi_list;
- struct pnfs_layout_range range;
- struct kref kref;
- struct pnfs_layout_hdr *layout;
+ struct list_head pls_list;
+ struct pnfs_layout_range pls_range;
+ atomic_t pls_refcount;
+ unsigned long pls_flags;
+ struct pnfs_layout_hdr *pls_layout;
};
#ifdef CONFIG_NFS_V4_1
@@ -44,7 +50,9 @@ struct pnfs_layout_segment {
enum {
NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
- NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */
+ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
+ NFS_LAYOUT_ROC, /* some lseg had roc bit set */
+ NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
};
/* Per-layout driver specific registration structure */
@@ -60,13 +68,16 @@ struct pnfs_layoutdriver_type {
};
struct pnfs_layout_hdr {
- unsigned long refcount;
- struct list_head layouts; /* other client layouts */
- struct list_head segs; /* layout segments list */
- seqlock_t seqlock; /* Protects the stateid */
- nfs4_stateid stateid;
- unsigned long state;
- struct inode *inode;
+ atomic_t plh_refcount;
+ struct list_head plh_layouts; /* other client layouts */
+ struct list_head plh_bulk_recall; /* clnt list of bulk recalls */
+ struct list_head plh_segs; /* layout segments list */
+ nfs4_stateid plh_stateid;
+ atomic_t plh_outstanding; /* number of RPCs out */
+ unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */
+ u32 plh_barrier; /* ignore lower seqids */
+ unsigned long plh_flags;
+ struct inode *plh_inode;
};
struct pnfs_device {
@@ -134,17 +145,30 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
/* pnfs.c */
+void get_layout_hdr(struct pnfs_layout_hdr *lo);
struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
enum pnfs_iomode access_type);
void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
+void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
-void put_layout_hdr(struct inode *inode);
-void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
- struct nfs4_state *open_state);
+void put_layout_hdr(struct pnfs_layout_hdr *lo);
+void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *new,
+ bool update_barrier);
+int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
+ struct pnfs_layout_hdr *lo,
+ struct nfs4_state *open_state);
+int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
+ struct list_head *tmp_list,
+ u32 iomode);
+bool pnfs_roc(struct inode *ino);
+void pnfs_roc_release(struct inode *ino);
+void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
static inline int lo_fail_bit(u32 iomode)
@@ -176,6 +200,28 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
return NULL;
}
+static inline bool
+pnfs_roc(struct inode *ino)
+{
+ return false;
+}
+
+static inline void
+pnfs_roc_release(struct inode *ino)
+{
+}
+
+static inline void
+pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
+{
+}
+
+static inline bool
+pnfs_roc_drain(struct inode *ino, u32 *barrier)
+{
+ return false;
+}
+
static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
{
}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 58e7f84..77d5e21 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -458,7 +458,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
fattr = nfs_alloc_fattr();
status = -ENOMEM;
if (fh == NULL || fattr == NULL)
- goto out;
+ goto out_free;
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
@@ -471,6 +471,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
if (status == 0)
status = nfs_instantiate(dentry, fh, fattr);
+out_free:
nfs_free_fattr(fattr);
nfs_free_fhandle(fh);
out:
@@ -731,7 +732,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.statfs = nfs_proc_statfs,
.fsinfo = nfs_proc_fsinfo,
.pathconf = nfs_proc_pathconf,
- .decode_dirent = nfs_decode_dirent,
+ .decode_dirent = nfs2_decode_dirent,
.read_setup = nfs_proc_read_setup,
.read_done = nfs_read_done,
.write_setup = nfs_proc_write_setup,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e4b62c6..aedcaa7 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -152,7 +152,6 @@ static void nfs_readpage_release(struct nfs_page *req)
(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
- nfs_clear_request(req);
nfs_release_request(req);
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3600ec7..0f9ea73 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -39,7 +39,6 @@
#include <linux/nfs_mount.h>
#include <linux/nfs4_mount.h>
#include <linux/lockd/bind.h>
-#include <linux/smp_lock.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
#include <linux/mnt_namespace.h>
@@ -67,6 +66,12 @@
#define NFSDBG_FACILITY NFSDBG_VFS
+#ifdef CONFIG_NFS_V3
+#define NFS_DEFAULT_VERSION 3
+#else
+#define NFS_DEFAULT_VERSION 2
+#endif
+
enum {
/* Mount options that take no arguments */
Opt_soft, Opt_hard,
@@ -260,8 +265,8 @@ static int nfs_statfs(struct dentry *, struct kstatfs *);
static int nfs_show_options(struct seq_file *, struct vfsmount *);
static int nfs_show_stats(struct seq_file *, struct vfsmount *);
static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
-static int nfs_xdev_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
static void nfs_put_super(struct super_block *);
static void nfs_kill_super(struct super_block *);
static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
@@ -277,7 +282,7 @@ static struct file_system_type nfs_fs_type = {
struct file_system_type nfs_xdev_fs_type = {
.owner = THIS_MODULE,
.name = "nfs",
- .get_sb = nfs_xdev_get_sb,
+ .mount = nfs_xdev_mount,
.kill_sb = nfs_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
@@ -302,14 +307,14 @@ static int nfs4_try_mount(int flags, const char *dev_name,
struct nfs_parsed_mount_data *data, struct vfsmount *mnt);
static int nfs4_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs4_remote_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
+static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
static int nfs4_referral_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
static void nfs4_kill_super(struct super_block *sb);
static struct file_system_type nfs4_fs_type = {
@@ -323,7 +328,7 @@ static struct file_system_type nfs4_fs_type = {
static struct file_system_type nfs4_remote_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
- .get_sb = nfs4_remote_get_sb,
+ .mount = nfs4_remote_mount,
.kill_sb = nfs4_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
@@ -331,7 +336,7 @@ static struct file_system_type nfs4_remote_fs_type = {
struct file_system_type nfs4_xdev_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
- .get_sb = nfs4_xdev_get_sb,
+ .mount = nfs4_xdev_mount,
.kill_sb = nfs4_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
@@ -339,7 +344,7 @@ struct file_system_type nfs4_xdev_fs_type = {
static struct file_system_type nfs4_remote_referral_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
- .get_sb = nfs4_remote_referral_get_sb,
+ .mount = nfs4_remote_referral_mount,
.kill_sb = nfs4_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
@@ -593,7 +598,9 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
if (nfss->mountd_version || showdefaults)
seq_printf(m, ",mountvers=%u", nfss->mountd_version);
- if (nfss->mountd_port || showdefaults)
+ if ((nfss->mountd_port &&
+ nfss->mountd_port != (unsigned short)NFS_UNSPEC_PORT) ||
+ showdefaults)
seq_printf(m, ",mountport=%u", nfss->mountd_port);
nfs_show_mountd_netid(m, nfss, showdefaults);
@@ -1064,12 +1071,10 @@ static int nfs_parse_mount_options(char *raw,
mnt->flags |= NFS_MOUNT_VER3;
mnt->version = 3;
break;
-#ifdef CONFIG_NFS_V4
case Opt_v4:
mnt->flags &= ~NFS_MOUNT_VER3;
mnt->version = 4;
break;
-#endif
case Opt_udp:
mnt->flags &= ~NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
@@ -1281,12 +1286,10 @@ static int nfs_parse_mount_options(char *raw,
mnt->flags |= NFS_MOUNT_VER3;
mnt->version = 3;
break;
-#ifdef CONFIG_NFS_V4
case NFS4_VERSION:
mnt->flags &= ~NFS_MOUNT_VER3;
mnt->version = 4;
break;
-#endif
default:
goto out_invalid_value;
}
@@ -2277,7 +2280,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
};
int error = -ENOMEM;
- data = nfs_alloc_parsed_mount_data(3);
+ data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
mntfh = nfs_alloc_fhandle();
if (data == NULL || mntfh == NULL)
goto out_free_fh;
@@ -2397,9 +2400,9 @@ static void nfs_kill_super(struct super_block *s)
/*
* Clone an NFS2/3 server record on xdev traversal (FSID-change)
*/
-static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data,
- struct vfsmount *mnt)
+static struct dentry *
+nfs_xdev_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
{
struct nfs_clone_mount *data = raw_data;
struct super_block *s;
@@ -2411,7 +2414,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
};
int error;
- dprintk("--> nfs_xdev_get_sb()\n");
+ dprintk("--> nfs_xdev_mount()\n");
/* create a new volume representation */
server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -2458,28 +2461,26 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
}
s->s_flags |= MS_ACTIVE;
- mnt->mnt_sb = s;
- mnt->mnt_root = mntroot;
/* clone any lsm security options from the parent to the new sb */
security_sb_clone_mnt_opts(data->sb, s);
- dprintk("<-- nfs_xdev_get_sb() = 0\n");
- return 0;
+ dprintk("<-- nfs_xdev_mount() = 0\n");
+ return mntroot;
out_err_nosb:
nfs_free_server(server);
out_err_noserver:
- dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
- return error;
+ dprintk("<-- nfs_xdev_mount() = %d [error]\n", error);
+ return ERR_PTR(error);
error_splat_super:
if (server && !s->s_root)
bdi_unregister(&server->backing_dev_info);
error_splat_bdi:
deactivate_locked_super(s);
- dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
- return error;
+ dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error);
+ return ERR_PTR(error);
}
#ifdef CONFIG_NFS_V4
@@ -2495,7 +2496,13 @@ static void nfs4_clone_super(struct super_block *sb,
sb->s_maxbytes = old_sb->s_maxbytes;
sb->s_time_gran = 1;
sb->s_op = old_sb->s_op;
- nfs_initialise_sb(sb);
+ /*
+ * The VFS shouldn't apply the umask to mode bits. We will do
+ * so ourselves when necessary.
+ */
+ sb->s_flags |= MS_POSIXACL;
+ sb->s_xattr = old_sb->s_xattr;
+ nfs_initialise_sb(sb);
}
/*
@@ -2505,6 +2512,12 @@ static void nfs4_fill_super(struct super_block *sb)
{
sb->s_time_gran = 1;
sb->s_op = &nfs4_sops;
+ /*
+ * The VFS shouldn't apply the umask to mode bits. We will do
+ * so ourselves when necessary.
+ */
+ sb->s_flags |= MS_POSIXACL;
+ sb->s_xattr = nfs4_xattr_handlers;
nfs_initialise_sb(sb);
}
@@ -2649,8 +2662,9 @@ out_no_address:
/*
* Get the superblock for the NFS4 root partition
*/
-static int nfs4_remote_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+static struct dentry *
+nfs4_remote_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
{
struct nfs_parsed_mount_data *data = raw_data;
struct super_block *s;
@@ -2714,15 +2728,16 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
goto error_splat_root;
s->s_flags |= MS_ACTIVE;
- mnt->mnt_sb = s;
- mnt->mnt_root = mntroot;
- error = 0;
+
+ security_free_mnt_opts(&data->lsm_opts);
+ nfs_free_fhandle(mntfh);
+ return mntroot;
out:
security_free_mnt_opts(&data->lsm_opts);
out_free_fh:
nfs_free_fhandle(mntfh);
- return error;
+ return ERR_PTR(error);
out_free:
nfs_free_server(server);
@@ -2968,9 +2983,9 @@ static void nfs4_kill_super(struct super_block *sb)
/*
* Clone an NFS4 server record on xdev traversal (FSID-change)
*/
-static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data,
- struct vfsmount *mnt)
+static struct dentry *
+nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
{
struct nfs_clone_mount *data = raw_data;
struct super_block *s;
@@ -2982,7 +2997,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
};
int error;
- dprintk("--> nfs4_xdev_get_sb()\n");
+ dprintk("--> nfs4_xdev_mount()\n");
/* create a new volume representation */
server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -3029,32 +3044,30 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
}
s->s_flags |= MS_ACTIVE;
- mnt->mnt_sb = s;
- mnt->mnt_root = mntroot;
security_sb_clone_mnt_opts(data->sb, s);
- dprintk("<-- nfs4_xdev_get_sb() = 0\n");
- return 0;
+ dprintk("<-- nfs4_xdev_mount() = 0\n");
+ return mntroot;
out_err_nosb:
nfs_free_server(server);
out_err_noserver:
- dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error);
- return error;
+ dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
+ return ERR_PTR(error);
error_splat_super:
if (server && !s->s_root)
bdi_unregister(&server->backing_dev_info);
error_splat_bdi:
deactivate_locked_super(s);
- dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
- return error;
+ dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
+ return ERR_PTR(error);
}
-static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data,
- struct vfsmount *mnt)
+static struct dentry *
+nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
{
struct nfs_clone_mount *data = raw_data;
struct super_block *s;
@@ -3118,14 +3131,12 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
}
s->s_flags |= MS_ACTIVE;
- mnt->mnt_sb = s;
- mnt->mnt_root = mntroot;
security_sb_clone_mnt_opts(data->sb, s);
nfs_free_fhandle(mntfh);
dprintk("<-- nfs4_referral_get_sb() = 0\n");
- return 0;
+ return mntroot;
out_err_nosb:
nfs_free_server(server);
@@ -3133,7 +3144,7 @@ out_err_noserver:
nfs_free_fhandle(mntfh);
out_err_nofh:
dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
- return error;
+ return ERR_PTR(error);
error_splat_super:
if (server && !s->s_root)
@@ -3142,7 +3153,7 @@ error_splat_bdi:
deactivate_locked_super(s);
nfs_free_fhandle(mntfh);
dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
- return error;
+ return ERR_PTR(error);
}
/*
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 9a16bad..e313a51 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -429,7 +429,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
return ERR_PTR(-ENOMEM);
- task_setup_data.callback_data = data,
+ task_setup_data.callback_data = data;
data->cred = rpc_lookup_cred();
if (IS_ERR(data->cred)) {
@@ -444,9 +444,9 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
/* set up nfs_renamedata */
data->old_dir = old_dir;
- atomic_inc(&old_dir->i_count);
+ ihold(old_dir);
data->new_dir = new_dir;
- atomic_inc(&new_dir->i_count);
+ ihold(new_dir);
data->old_dentry = dget(old_dentry);
data->new_dentry = dget(new_dentry);
nfs_fattr_init(&data->old_fattr);
@@ -496,7 +496,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- atomic_read(&dentry->d_count));
+ dentry->d_count);
nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4c14c17..10d648ea 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -390,6 +390,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
if (nfs_have_delegation(inode, FMODE_WRITE))
nfsi->change_attr++;
}
+ set_bit(PG_MAPPED, &req->wb_flags);
SetPagePrivate(req->wb_page);
set_page_private(req->wb_page, (unsigned long)req);
nfsi->npages++;
@@ -415,6 +416,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
spin_lock(&inode->i_lock);
set_page_private(req->wb_page, 0);
ClearPagePrivate(req->wb_page);
+ clear_bit(PG_MAPPED, &req->wb_flags);
radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
nfsi->npages--;
if (!nfsi->npages) {
@@ -422,7 +424,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
iput(inode);
} else
spin_unlock(&inode->i_lock);
- nfs_clear_request(req);
nfs_release_request(req);
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 2a533a0..7e84a85 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -260,9 +260,11 @@ void fill_post_wcc(struct svc_fh *fhp)
err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
&fhp->fh_post_attr);
fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
- if (err)
+ if (err) {
fhp->fh_post_saved = 0;
- else
+ /* Grab the ctime anyway - set_change_info might use it */
+ fhp->fh_post_attr.ctime = fhp->fh_dentry->d_inode->i_ctime;
+ } else
fhp->fh_post_saved = 1;
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 143da2e..21a63da 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -50,11 +50,6 @@ enum {
NFSPROC4_CLNT_CB_SEQUENCE,
};
-enum nfs_cb_opnum4 {
- OP_CB_RECALL = 4,
- OP_CB_SEQUENCE = 11,
-};
-
#define NFS4_MAXTAGLEN 20
#define NFS4_enc_cb_null_sz 0
@@ -79,61 +74,6 @@ enum nfs_cb_opnum4 {
cb_sequence_dec_sz + \
op_dec_sz)
-/*
-* Generic encode routines from fs/nfs/nfs4xdr.c
-*/
-static inline __be32 *
-xdr_writemem(__be32 *p, const void *ptr, int nbytes)
-{
- int tmp = XDR_QUADLEN(nbytes);
- if (!tmp)
- return p;
- p[tmp-1] = 0;
- memcpy(p, ptr, nbytes);
- return p + tmp;
-}
-
-#define WRITE32(n) *p++ = htonl(n)
-#define WRITEMEM(ptr,nbytes) do { \
- p = xdr_writemem(p, ptr, nbytes); \
-} while (0)
-#define RESERVE_SPACE(nbytes) do { \
- p = xdr_reserve_space(xdr, nbytes); \
- if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \
- BUG_ON(!p); \
-} while (0)
-
-/*
- * Generic decode routines from fs/nfs/nfs4xdr.c
- */
-#define DECODE_TAIL \
- status = 0; \
-out: \
- return status; \
-xdr_error: \
- dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \
- status = -EIO; \
- goto out
-
-#define READ32(x) (x) = ntohl(*p++)
-#define READ64(x) do { \
- (x) = (u64)ntohl(*p++) << 32; \
- (x) |= ntohl(*p++); \
-} while (0)
-#define READTIME(x) do { \
- p++; \
- (x.tv_sec) = ntohl(*p++); \
- (x.tv_nsec) = ntohl(*p++); \
-} while (0)
-#define READ_BUF(nbytes) do { \
- p = xdr_inline_decode(xdr, nbytes); \
- if (!p) { \
- dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
- __func__, __LINE__); \
- return -EIO; \
- } \
-} while (0)
-
struct nfs4_cb_compound_hdr {
/* args */
u32 ident; /* minorversion 0 only */
@@ -144,295 +84,513 @@ struct nfs4_cb_compound_hdr {
int status;
};
-static struct {
-int stat;
-int errno;
-} nfs_cb_errtbl[] = {
- { NFS4_OK, 0 },
- { NFS4ERR_PERM, EPERM },
- { NFS4ERR_NOENT, ENOENT },
- { NFS4ERR_IO, EIO },
- { NFS4ERR_NXIO, ENXIO },
- { NFS4ERR_ACCESS, EACCES },
- { NFS4ERR_EXIST, EEXIST },
- { NFS4ERR_XDEV, EXDEV },
- { NFS4ERR_NOTDIR, ENOTDIR },
- { NFS4ERR_ISDIR, EISDIR },
- { NFS4ERR_INVAL, EINVAL },
- { NFS4ERR_FBIG, EFBIG },
- { NFS4ERR_NOSPC, ENOSPC },
- { NFS4ERR_ROFS, EROFS },
- { NFS4ERR_MLINK, EMLINK },
- { NFS4ERR_NAMETOOLONG, ENAMETOOLONG },
- { NFS4ERR_NOTEMPTY, ENOTEMPTY },
- { NFS4ERR_DQUOT, EDQUOT },
- { NFS4ERR_STALE, ESTALE },
- { NFS4ERR_BADHANDLE, EBADHANDLE },
- { NFS4ERR_BAD_COOKIE, EBADCOOKIE },
- { NFS4ERR_NOTSUPP, ENOTSUPP },
- { NFS4ERR_TOOSMALL, ETOOSMALL },
- { NFS4ERR_SERVERFAULT, ESERVERFAULT },
- { NFS4ERR_BADTYPE, EBADTYPE },
- { NFS4ERR_LOCKED, EAGAIN },
- { NFS4ERR_RESOURCE, EREMOTEIO },
- { NFS4ERR_SYMLINK, ELOOP },
- { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP },
- { NFS4ERR_DEADLOCK, EDEADLK },
- { -1, EIO }
-};
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+ dprintk("NFS: %s prematurely hit the end of our receive buffer. "
+ "Remaining buffer length is %tu words.\n",
+ func, xdr->end - xdr->p);
+}
-static int
-nfs_cb_stat_to_errno(int stat)
+static __be32 *xdr_encode_empty_array(__be32 *p)
{
- int i;
- for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
- if (nfs_cb_errtbl[i].stat == stat)
- return nfs_cb_errtbl[i].errno;
- }
- /* If we cannot translate the error, the recovery routines should
- * handle it.
- * Note: remaining NFSv4 error codes have values > 10000, so should
- * not conflict with native Linux error codes.
- */
- return stat;
+ *p++ = xdr_zero;
+ return p;
}
/*
- * XDR encode
+ * Encode/decode NFSv4 CB basic data types
+ *
+ * Basic NFSv4 callback data types are defined in section 15 of RFC
+ * 3530: "Network File System (NFS) version 4 Protocol" and section
+ * 20 of RFC 5661: "Network File System (NFS) Version 4 Minor Version
+ * 1 Protocol"
+ */
+
+/*
+ * nfs_cb_opnum4
+ *
+ * enum nfs_cb_opnum4 {
+ * OP_CB_GETATTR = 3,
+ * ...
+ * };
*/
+enum nfs_cb_opnum4 {
+ OP_CB_GETATTR = 3,
+ OP_CB_RECALL = 4,
+ OP_CB_LAYOUTRECALL = 5,
+ OP_CB_NOTIFY = 6,
+ OP_CB_PUSH_DELEG = 7,
+ OP_CB_RECALL_ANY = 8,
+ OP_CB_RECALLABLE_OBJ_AVAIL = 9,
+ OP_CB_RECALL_SLOT = 10,
+ OP_CB_SEQUENCE = 11,
+ OP_CB_WANTS_CANCELLED = 12,
+ OP_CB_NOTIFY_LOCK = 13,
+ OP_CB_NOTIFY_DEVICEID = 14,
+ OP_CB_ILLEGAL = 10044
+};
-static void
-encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
+static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
{
__be32 *p;
- RESERVE_SPACE(sizeof(stateid_t));
- WRITE32(sid->si_generation);
- WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(op);
}
-static void
-encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
+/*
+ * nfs_fh4
+ *
+ * typedef opaque nfs_fh4<NFS4_FHSIZE>;
+ */
+static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh)
{
- __be32 * p;
+ u32 length = fh->fh_size;
+ __be32 *p;
- RESERVE_SPACE(16);
- WRITE32(0); /* tag length is always 0 */
- WRITE32(hdr->minorversion);
- WRITE32(hdr->ident);
- hdr->nops_p = p;
- WRITE32(hdr->nops);
+ BUG_ON(length > NFS4_FHSIZE);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, &fh->fh_base, length);
}
-static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
+/*
+ * stateid4
+ *
+ * struct stateid4 {
+ * uint32_t seqid;
+ * opaque other[12];
+ * };
+ */
+static void encode_stateid4(struct xdr_stream *xdr, const stateid_t *sid)
{
- *hdr->nops_p = htonl(hdr->nops);
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(sid->si_generation);
+ xdr_encode_opaque_fixed(p, &sid->si_opaque, NFS4_STATEID_OTHER_SIZE);
}
-static void
-encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
- struct nfs4_cb_compound_hdr *hdr)
+/*
+ * sessionid4
+ *
+ * typedef opaque sessionid4[NFS4_SESSIONID_SIZE];
+ */
+static void encode_sessionid4(struct xdr_stream *xdr,
+ const struct nfsd4_session *session)
{
__be32 *p;
- int len = dp->dl_fh.fh_size;
-
- RESERVE_SPACE(4);
- WRITE32(OP_CB_RECALL);
- encode_stateid(xdr, &dp->dl_stateid);
- RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2));
- WRITE32(0); /* truncate optimization not implemented */
- WRITE32(len);
- WRITEMEM(&dp->dl_fh.fh_base, len);
- hdr->nops++;
+
+ p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN);
+ xdr_encode_opaque_fixed(p, session->se_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
}
-static void
-encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
- struct nfs4_cb_compound_hdr *hdr)
-{
- __be32 *p;
- struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
+/*
+ * nfsstat4
+ */
+static const struct {
+ int stat;
+ int errno;
+} nfs_cb_errtbl[] = {
+ { NFS4_OK, 0 },
+ { NFS4ERR_PERM, -EPERM },
+ { NFS4ERR_NOENT, -ENOENT },
+ { NFS4ERR_IO, -EIO },
+ { NFS4ERR_NXIO, -ENXIO },
+ { NFS4ERR_ACCESS, -EACCES },
+ { NFS4ERR_EXIST, -EEXIST },
+ { NFS4ERR_XDEV, -EXDEV },
+ { NFS4ERR_NOTDIR, -ENOTDIR },
+ { NFS4ERR_ISDIR, -EISDIR },
+ { NFS4ERR_INVAL, -EINVAL },
+ { NFS4ERR_FBIG, -EFBIG },
+ { NFS4ERR_NOSPC, -ENOSPC },
+ { NFS4ERR_ROFS, -EROFS },
+ { NFS4ERR_MLINK, -EMLINK },
+ { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG },
+ { NFS4ERR_NOTEMPTY, -ENOTEMPTY },
+ { NFS4ERR_DQUOT, -EDQUOT },
+ { NFS4ERR_STALE, -ESTALE },
+ { NFS4ERR_BADHANDLE, -EBADHANDLE },
+ { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
+ { NFS4ERR_NOTSUPP, -ENOTSUPP },
+ { NFS4ERR_TOOSMALL, -ETOOSMALL },
+ { NFS4ERR_SERVERFAULT, -ESERVERFAULT },
+ { NFS4ERR_BADTYPE, -EBADTYPE },
+ { NFS4ERR_LOCKED, -EAGAIN },
+ { NFS4ERR_RESOURCE, -EREMOTEIO },
+ { NFS4ERR_SYMLINK, -ELOOP },
+ { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
+ { NFS4ERR_DEADLOCK, -EDEADLK },
+ { -1, -EIO }
+};
- if (hdr->minorversion == 0)
- return;
+/*
+ * If we cannot translate the error, the recovery routines should
+ * handle it.
+ *
+ * Note: remaining NFSv4 error codes have values > 10000, so should
+ * not conflict with native Linux error codes.
+ */
+static int nfs_cb_stat_to_errno(int status)
+{
+ int i;
- RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
+ for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
+ if (nfs_cb_errtbl[i].stat == status)
+ return nfs_cb_errtbl[i].errno;
+ }
- WRITE32(OP_CB_SEQUENCE);
- WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN);
- WRITE32(ses->se_cb_seq_nr);
- WRITE32(0); /* slotid, always 0 */
- WRITE32(0); /* highest slotid always 0 */
- WRITE32(0); /* cachethis always 0 */
- WRITE32(0); /* FIXME: support referring_call_lists */
- hdr->nops++;
+ dprintk("NFSD: Unrecognized NFS CB status value: %u\n", status);
+ return -status;
}
-static int
-nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
+static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
+ enum nfsstat4 *status)
{
- struct xdr_stream xdrs, *xdr = &xdrs;
+ __be32 *p;
+ u32 op;
- xdr_init_encode(&xdrs, &req->rq_snd_buf, p);
- RESERVE_SPACE(0);
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ op = be32_to_cpup(p++);
+ if (unlikely(op != expected))
+ goto out_unexpected;
+ *status = be32_to_cpup(p);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+out_unexpected:
+ dprintk("NFSD: Callback server returned operation %d but "
+ "we issued a request for %d\n", op, expected);
+ return -EIO;
}
-static int
-nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
- struct nfsd4_callback *cb)
+/*
+ * CB_COMPOUND4args
+ *
+ * struct CB_COMPOUND4args {
+ * utf8str_cs tag;
+ * uint32_t minorversion;
+ * uint32_t callback_ident;
+ * nfs_cb_argop4 argarray<>;
+ * };
+*/
+static void encode_cb_compound4args(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr)
{
- struct xdr_stream xdr;
- struct nfs4_delegation *args = cb->cb_op;
- struct nfs4_cb_compound_hdr hdr = {
- .ident = cb->cb_clp->cl_cb_ident,
- .minorversion = cb->cb_minorversion,
- };
+ __be32 * p;
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_cb_compound_hdr(&xdr, &hdr);
- encode_cb_sequence(&xdr, cb, &hdr);
- encode_cb_recall(&xdr, args, &hdr);
- encode_cb_nops(&hdr);
+ p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
+ p = xdr_encode_empty_array(p); /* empty tag */
+ *p++ = cpu_to_be32(hdr->minorversion);
+ *p++ = cpu_to_be32(hdr->ident);
+
+ hdr->nops_p = p;
+ *p = cpu_to_be32(hdr->nops); /* argarray element count */
+}
+
+/*
+ * Update argarray element count
+ */
+static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
+{
+ BUG_ON(hdr->nops > NFS4_MAX_BACK_CHANNEL_OPS);
+ *hdr->nops_p = cpu_to_be32(hdr->nops);
+}
+
+/*
+ * CB_COMPOUND4res
+ *
+ * struct CB_COMPOUND4res {
+ * nfsstat4 status;
+ * utf8str_cs tag;
+ * nfs_cb_resop4 resarray<>;
+ * };
+ */
+static int decode_cb_compound4res(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ hdr->status = be32_to_cpup(p++);
+ /* Ignore the tag */
+ length = be32_to_cpup(p++);
+ p = xdr_inline_decode(xdr, length + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ hdr->nops = be32_to_cpup(p);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
+/*
+ * CB_RECALL4args
+ *
+ * struct CB_RECALL4args {
+ * stateid4 stateid;
+ * bool truncate;
+ * nfs_fh4 fh;
+ * };
+ */
+static void encode_cb_recall4args(struct xdr_stream *xdr,
+ const struct nfs4_delegation *dp,
+ struct nfs4_cb_compound_hdr *hdr)
+{
+ __be32 *p;
+
+ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL);
+ encode_stateid4(xdr, &dp->dl_stateid);
+
+ p = xdr_reserve_space(xdr, 4);
+ *p++ = xdr_zero; /* truncate */
-static int
-decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
- __be32 *p;
- u32 taglen;
+ encode_nfs_fh4(xdr, &dp->dl_fh);
- READ_BUF(8);
- READ32(hdr->status);
- /* We've got no use for the tag; ignore it: */
- READ32(taglen);
- READ_BUF(taglen + 4);
- p += XDR_QUADLEN(taglen);
- READ32(hdr->nops);
- return 0;
+ hdr->nops++;
}
-static int
-decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+/*
+ * CB_SEQUENCE4args
+ *
+ * struct CB_SEQUENCE4args {
+ * sessionid4 csa_sessionid;
+ * sequenceid4 csa_sequenceid;
+ * slotid4 csa_slotid;
+ * slotid4 csa_highest_slotid;
+ * bool csa_cachethis;
+ * referring_call_list4 csa_referring_call_lists<>;
+ * };
+ */
+static void encode_cb_sequence4args(struct xdr_stream *xdr,
+ const struct nfsd4_callback *cb,
+ struct nfs4_cb_compound_hdr *hdr)
{
+ struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
__be32 *p;
- u32 op;
- int32_t nfserr;
-
- READ_BUF(8);
- READ32(op);
- if (op != expected) {
- dprintk("NFSD: decode_cb_op_hdr: Callback server returned "
- " operation %d but we issued a request for %d\n",
- op, expected);
- return -EIO;
- }
- READ32(nfserr);
- if (nfserr != NFS_OK)
- return -nfs_cb_stat_to_errno(nfserr);
- return 0;
+
+ if (hdr->minorversion == 0)
+ return;
+
+ encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE);
+ encode_sessionid4(xdr, session);
+
+ p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
+ *p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */
+ *p++ = xdr_zero; /* csa_slotid */
+ *p++ = xdr_zero; /* csa_highest_slotid */
+ *p++ = xdr_zero; /* csa_cachethis */
+ xdr_encode_empty_array(p); /* csa_referring_call_lists */
+
+ hdr->nops++;
}
/*
+ * CB_SEQUENCE4resok
+ *
+ * struct CB_SEQUENCE4resok {
+ * sessionid4 csr_sessionid;
+ * sequenceid4 csr_sequenceid;
+ * slotid4 csr_slotid;
+ * slotid4 csr_highest_slotid;
+ * slotid4 csr_target_highest_slotid;
+ * };
+ *
+ * union CB_SEQUENCE4res switch (nfsstat4 csr_status) {
+ * case NFS4_OK:
+ * CB_SEQUENCE4resok csr_resok4;
+ * default:
+ * void;
+ * };
+ *
* Our current back channel implmentation supports a single backchannel
* with a single slot.
*/
-static int
-decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
- struct rpc_rqst *rqstp)
+static int decode_cb_sequence4resok(struct xdr_stream *xdr,
+ struct nfsd4_callback *cb)
{
- struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
+ struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
struct nfs4_sessionid id;
int status;
- u32 dummy;
__be32 *p;
+ u32 dummy;
- if (cb->cb_minorversion == 0)
- return 0;
-
- status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
- if (status)
- return status;
+ status = -ESERVERFAULT;
/*
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
*/
- status = -ESERVERFAULT;
-
- READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
+ p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
- p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
- if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
- dprintk("%s Invalid session id\n", __func__);
+ if (memcmp(id.data, session->se_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN) != 0) {
+ dprintk("NFS: %s Invalid session id\n", __func__);
goto out;
}
- READ32(dummy);
- if (dummy != ses->se_cb_seq_nr) {
- dprintk("%s Invalid sequence number\n", __func__);
+ p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
+
+ dummy = be32_to_cpup(p++);
+ if (dummy != session->se_cb_seq_nr) {
+ dprintk("NFS: %s Invalid sequence number\n", __func__);
goto out;
}
- READ32(dummy); /* slotid must be 0 */
+
+ dummy = be32_to_cpup(p++);
if (dummy != 0) {
- dprintk("%s Invalid slotid\n", __func__);
+ dprintk("NFS: %s Invalid slotid\n", __func__);
goto out;
}
- /* FIXME: process highest slotid and target highest slotid */
+
+ /*
+ * FIXME: process highest slotid and target highest slotid
+ */
status = 0;
out:
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
+static int decode_cb_sequence4res(struct xdr_stream *xdr,
+ struct nfsd4_callback *cb)
+{
+ enum nfsstat4 nfserr;
+ int status;
+
+ if (cb->cb_minorversion == 0)
+ return 0;
+
+ status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr);
+ if (unlikely(status))
+ goto out;
+ if (unlikely(nfserr != NFS4_OK))
+ goto out_default;
+ status = decode_cb_sequence4resok(xdr, cb);
+out:
+ return status;
+out_default:
+ return nfs_cb_stat_to_errno(status);
+}
-static int
-nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
+/*
+ * NFSv4.0 and NFSv4.1 XDR encode functions
+ *
+ * NFSv4.0 callback argument types are defined in section 15 of RFC
+ * 3530: "Network File System (NFS) version 4 Protocol" and section 20
+ * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1
+ * Protocol".
+ */
+
+/*
+ * NB: Without this zero space reservation, callbacks over krb5p fail
+ */
+static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
+ void *__unused)
+{
+ xdr_reserve_space(xdr, 0);
+}
+
+/*
+ * 20.2. Operation 4: CB_RECALL - Recall a Delegation
+ */
+static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfsd4_callback *cb)
+{
+ const struct nfs4_delegation *args = cb->cb_op;
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_minorversion,
+ };
+
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_recall4args(xdr, args, &hdr);
+ encode_cb_nops(&hdr);
+}
+
+
+/*
+ * NFSv4.0 and NFSv4.1 XDR decode functions
+ *
+ * NFSv4.0 callback result types are defined in section 15 of RFC
+ * 3530: "Network File System (NFS) version 4 Protocol" and section 20
+ * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1
+ * Protocol".
+ */
+
+static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
+ void *__unused)
{
return 0;
}
-static int
-nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
- struct nfsd4_callback *cb)
+/*
+ * 20.2. Operation 4: CB_RECALL - Recall a Delegation
+ */
+static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfsd4_callback *cb)
{
- struct xdr_stream xdr;
struct nfs4_cb_compound_hdr hdr;
+ enum nfsstat4 nfserr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_cb_compound_hdr(&xdr, &hdr);
- if (status)
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
goto out;
- if (cb) {
- status = decode_cb_sequence(&xdr, cb, rqstp);
- if (status)
+
+ if (cb != NULL) {
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status))
goto out;
}
- status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
+
+ status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
+ if (unlikely(status))
+ goto out;
+ if (unlikely(nfserr != NFS4_OK))
+ goto out_default;
out:
return status;
+out_default:
+ return nfs_cb_stat_to_errno(status);
}
/*
* RPC procedure tables
*/
-#define PROC(proc, call, argtype, restype) \
-[NFSPROC4_CLNT_##proc] = { \
- .p_proc = NFSPROC4_CB_##call, \
- .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
- .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
- .p_arglen = NFS4_##argtype##_sz, \
- .p_replen = NFS4_##restype##_sz, \
- .p_statidx = NFSPROC4_CB_##call, \
- .p_name = #proc, \
-}
-
-static struct rpc_procinfo nfs4_cb_procedures[] = {
- PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null),
- PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall),
+#define PROC(proc, call, argtype, restype) \
+[NFSPROC4_CLNT_##proc] = { \
+ .p_proc = NFSPROC4_CB_##call, \
+ .p_encode = (kxdreproc_t)nfs4_xdr_enc_##argtype, \
+ .p_decode = (kxdrdproc_t)nfs4_xdr_dec_##restype, \
+ .p_arglen = NFS4_enc_##argtype##_sz, \
+ .p_replen = NFS4_dec_##restype##_sz, \
+ .p_statidx = NFSPROC4_CB_##call, \
+ .p_name = #proc, \
+}
+
+static struct rpc_procinfo nfs4_cb_procedures[] = {
+ PROC(CB_NULL, NULL, cb_null, cb_null),
+ PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall),
};
-static struct rpc_version nfs_cb_version4 = {
+static struct rpc_version nfs_cb_version4 = {
/*
* Note on the callback rpc program version number: despite language in rfc
* 5661 section 18.36.3 requiring servers to use 4 in this field, the
@@ -440,29 +598,29 @@ static struct rpc_version nfs_cb_version4 = {
* in practice that appears to be what implementations use. The section
* 18.36.3 language is expected to be fixed in an erratum.
*/
- .number = 1,
- .nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
- .procs = nfs4_cb_procedures
+ .number = 1,
+ .nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
+ .procs = nfs4_cb_procedures
};
-static struct rpc_version * nfs_cb_version[] = {
+static struct rpc_version *nfs_cb_version[] = {
&nfs_cb_version4,
};
static struct rpc_program cb_program;
static struct rpc_stat cb_stats = {
- .program = &cb_program
+ .program = &cb_program
};
#define NFS4_CALLBACK 0x40000000
static struct rpc_program cb_program = {
- .name = "nfs4_cb",
- .number = NFS4_CALLBACK,
- .nrvers = ARRAY_SIZE(nfs_cb_version),
- .version = nfs_cb_version,
- .stats = &cb_stats,
- .pipe_dir_name = "/nfsd4_cb",
+ .name = "nfs4_cb",
+ .number = NFS4_CALLBACK,
+ .nrvers = ARRAY_SIZE(nfs_cb_version),
+ .version = nfs_cb_version,
+ .stats = &cb_stats,
+ .pipe_dir_name = "/nfsd4_cb",
};
static int max_cb_time(void)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 56347e0..fbd18c3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -673,16 +673,17 @@ static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
spin_unlock(&clp->cl_lock);
}
-static void nfsd4_register_conn(struct nfsd4_conn *conn)
+static int nfsd4_register_conn(struct nfsd4_conn *conn)
{
conn->cn_xpt_user.callback = nfsd4_conn_lost;
- register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
+ return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
}
static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
{
struct nfsd4_conn *conn;
u32 flags = NFS4_CDFC4_FORE;
+ int ret;
if (ses->se_flags & SESSION4_BACK_CHAN)
flags |= NFS4_CDFC4_BACK;
@@ -690,7 +691,10 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
if (!conn)
return nfserr_jukebox;
nfsd4_hash_conn(conn, ses);
- nfsd4_register_conn(conn);
+ ret = nfsd4_register_conn(conn);
+ if (ret)
+ /* oops; xprt is already down: */
+ nfsd4_conn_lost(&conn->cn_xpt_user);
return nfs_ok;
}
@@ -1644,6 +1648,7 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
{
struct nfs4_client *clp = ses->se_client;
struct nfsd4_conn *c;
+ int ret;
spin_lock(&clp->cl_lock);
c = __nfsd4_find_conn(new->cn_xprt, ses);
@@ -1654,7 +1659,10 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
}
__nfsd4_hash_conn(new, ses);
spin_unlock(&clp->cl_lock);
- nfsd4_register_conn(new);
+ ret = nfsd4_register_conn(new);
+ if (ret)
+ /* oops; xprt is already down: */
+ nfsd4_conn_lost(&new->cn_xpt_user);
return;
}
@@ -2254,7 +2262,7 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
* Spawn a thread to perform a recall on the delegation represented
* by the lease (file_lock)
*
- * Called from break_lease() with lock_kernel() held.
+ * Called from break_lease() with lock_flocks() held.
* Note: we assume break_lease will only call this *once* for any given
* lease.
*/
@@ -2278,7 +2286,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
spin_unlock(&recall_lock);
- /* only place dl_time is set. protected by lock_kernel*/
+ /* only place dl_time is set. protected by lock_flocks*/
dp->dl_time = get_seconds();
/*
@@ -2295,7 +2303,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
/*
* The file_lock is being reapd.
*
- * Called by locks_free_lock() with lock_kernel() held.
+ * Called by locks_free_lock() with lock_flocks() held.
*/
static
void nfsd_release_deleg_cb(struct file_lock *fl)
@@ -2310,23 +2318,7 @@ void nfsd_release_deleg_cb(struct file_lock *fl)
}
/*
- * Set the delegation file_lock back pointer.
- *
- * Called from setlease() with lock_kernel() held.
- */
-static
-void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
-{
- struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
-
- dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
- if (!dp)
- return;
- dp->dl_flock = new;
-}
-
-/*
- * Called from setlease() with lock_kernel() held
+ * Called from setlease() with lock_flocks() held
*/
static
int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try)
@@ -2355,7 +2347,6 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
static const struct lock_manager_operations nfsd_lease_mng_ops = {
.fl_break = nfsd_break_deleg_cb,
.fl_release_private = nfsd_release_deleg_cb,
- .fl_copy_lock = nfsd_copy_lock_deleg_cb,
.fl_mylease = nfsd_same_client_deleg_cb,
.fl_change = nfsd_change_deleg_cb,
};
@@ -2661,12 +2652,15 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
fl->fl_file = find_readable_file(stp->st_file);
BUG_ON(!fl->fl_file);
fl->fl_pid = current->tgid;
+ dp->dl_flock = fl;
/* vfs_setlease checks to see if delegation should be handed out.
* the lock_manager callbacks fl_mylease and fl_change are used
*/
if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
dprintk("NFSD: setlease failed [%d], no delegation\n", status);
+ dp->dl_flock = NULL;
+ locks_free_lock(fl);
unhash_delegation(dp);
flag = NFS4_OPEN_DELEGATE_NONE;
goto out;
@@ -4342,7 +4336,7 @@ __nfs4_state_shutdown(void)
void
nfs4_state_shutdown(void)
{
- cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work);
+ cancel_delayed_work_sync(&laundromat_work);
destroy_workqueue(laundry_wq);
locks_end_grace(&nfsd4_manager);
nfs4_lock_state();
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d6dc3f6..4514ebb 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1405,16 +1405,16 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
return simple_fill_super(sb, 0x6e667364, nfsd_files);
}
-static int nfsd_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *nfsd_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt);
+ return mount_single(fs_type, flags, data, nfsd_fill_super);
}
static struct file_system_type nfsd_fs_type = {
.owner = THIS_MODULE,
.name = "nfsd",
- .get_sb = nfsd_get_sb,
+ .mount = nfsd_mount,
.kill_sb = kill_litter_super,
};
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 184938f..3a35902 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1756,8 +1756,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
goto out_dput_new;
if (svc_msnfs(ffhp) &&
- ((atomic_read(&odentry->d_count) > 1)
- || (atomic_read(&ndentry->d_count) > 1))) {
+ ((odentry->d_count > 1) || (ndentry->d_count > 1))) {
host_err = -EPERM;
goto out_dput_new;
}
@@ -1843,7 +1842,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (type != S_IFDIR) { /* It's UNLINK */
#ifdef MSNFS
if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
- (atomic_read(&rdentry->d_count) > 1)) {
+ (rdentry->d_count > 1)) {
host_err = -EPERM;
} else
#endif
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 4d476ff..60fce3d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -484,18 +484,17 @@ static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
static inline void
set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
{
- BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
- cinfo->atomic = 1;
+ BUG_ON(!fhp->fh_pre_saved);
+ cinfo->atomic = fhp->fh_post_saved;
cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
- if (cinfo->change_supported) {
- cinfo->before_change = fhp->fh_pre_change;
- cinfo->after_change = fhp->fh_post_change;
- } else {
- cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
- cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
- cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
- cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
- }
+
+ cinfo->before_change = fhp->fh_pre_change;
+ cinfo->after_change = fhp->fh_post_change;
+ cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
+ cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
+ cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
+ cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
+
}
int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 8b782b0..3ee67c6 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -35,7 +35,20 @@
struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
{
- return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode));
+ return NILFS_I_NILFS(bmap->b_inode)->ns_dat;
+}
+
+static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
+ const char *fname, int err)
+{
+ struct inode *inode = bmap->b_inode;
+
+ if (err == -EINVAL) {
+ nilfs_error(inode->i_sb, fname,
+ "broken bmap (inode number=%lu)\n", inode->i_ino);
+ err = -EIO;
+ }
+ return err;
}
/**
@@ -66,8 +79,10 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
down_read(&bmap->b_sem);
ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp);
- if (ret < 0)
+ if (ret < 0) {
+ ret = nilfs_bmap_convert_error(bmap, __func__, ret);
goto out;
+ }
if (NILFS_BMAP_USE_VBN(bmap)) {
ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp,
&blocknr);
@@ -88,7 +103,8 @@ int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
down_read(&bmap->b_sem);
ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks);
up_read(&bmap->b_sem);
- return ret;
+
+ return nilfs_bmap_convert_error(bmap, __func__, ret);
}
static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
@@ -144,7 +160,8 @@ int nilfs_bmap_insert(struct nilfs_bmap *bmap,
down_write(&bmap->b_sem);
ret = nilfs_bmap_do_insert(bmap, key, rec);
up_write(&bmap->b_sem);
- return ret;
+
+ return nilfs_bmap_convert_error(bmap, __func__, ret);
}
static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key)
@@ -180,9 +197,12 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key)
down_read(&bmap->b_sem);
ret = bmap->b_ops->bop_last_key(bmap, &lastkey);
- if (!ret)
- *key = lastkey;
up_read(&bmap->b_sem);
+
+ if (ret < 0)
+ ret = nilfs_bmap_convert_error(bmap, __func__, ret);
+ else
+ *key = lastkey;
return ret;
}
@@ -210,7 +230,8 @@ int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key)
down_write(&bmap->b_sem);
ret = nilfs_bmap_do_delete(bmap, key);
up_write(&bmap->b_sem);
- return ret;
+
+ return nilfs_bmap_convert_error(bmap, __func__, ret);
}
static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key)
@@ -261,7 +282,8 @@ int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key)
down_write(&bmap->b_sem);
ret = nilfs_bmap_do_truncate(bmap, key);
up_write(&bmap->b_sem);
- return ret;
+
+ return nilfs_bmap_convert_error(bmap, __func__, ret);
}
/**
@@ -300,7 +322,8 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh)
down_write(&bmap->b_sem);
ret = bmap->b_ops->bop_propagate(bmap, bh);
up_write(&bmap->b_sem);
- return ret;
+
+ return nilfs_bmap_convert_error(bmap, __func__, ret);
}
/**
@@ -344,7 +367,8 @@ int nilfs_bmap_assign(struct nilfs_bmap *bmap,
down_write(&bmap->b_sem);
ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo);
up_write(&bmap->b_sem);
- return ret;
+
+ return nilfs_bmap_convert_error(bmap, __func__, ret);
}
/**
@@ -373,7 +397,8 @@ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level)
down_write(&bmap->b_sem);
ret = bmap->b_ops->bop_mark(bmap, key, level);
up_write(&bmap->b_sem);
- return ret;
+
+ return nilfs_bmap_convert_error(bmap, __func__, ret);
}
/**
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 5115814c..388e9e8 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -104,8 +104,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
if (pblocknr == 0) {
pblocknr = blocknr;
if (inode->i_ino != NILFS_DAT_INO) {
- struct inode *dat =
- nilfs_dat_inode(NILFS_I_NILFS(inode));
+ struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
/* blocknr is a virtual block number */
err = nilfs_dat_translate(dat, blocknr, &pblocknr);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 49c844d..59e5fe7 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -335,7 +335,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
* the device at this point.
*
* To prevent nilfs_dat_translate() from returning the
- * uncommited block number, this makes a copy of the entry
+ * uncommitted block number, this makes a copy of the entry
* buffer and redirects nilfs_dat_translate() to the copy.
*/
if (!buffer_nilfs_redirected(entry_bh)) {
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index cb003c8..9d45773 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -91,7 +91,6 @@ static void nilfs_commit_chunk(struct page *page,
unsigned from, unsigned to)
{
struct inode *dir = mapping->host;
- struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb);
loff_t pos = page_offset(page) + from;
unsigned len = to - from;
unsigned nr_dirty, copied;
@@ -103,7 +102,7 @@ static void nilfs_commit_chunk(struct page *page,
i_size_write(dir, pos + copied);
if (IS_DIRSYNC(dir))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
- err = nilfs_set_file_dirty(sbi, dir, nr_dirty);
+ err = nilfs_set_file_dirty(dir, nr_dirty);
WARN_ON(err); /* do not happen */
unlock_page(page);
}
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index c9a30d7..2f560c9 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -155,6 +155,7 @@ const struct inode_operations nilfs_file_inode_operations = {
.truncate = nilfs_truncate,
.setattr = nilfs_setattr,
.permission = nilfs_permission,
+ .fiemap = nilfs_fiemap,
};
/* end of file */
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 33ad25d..caf9a6a 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -176,7 +176,6 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
int nilfs_init_gcinode(struct inode *inode)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
- struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
inode->i_mode = S_IFREG;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
@@ -186,14 +185,6 @@ int nilfs_init_gcinode(struct inode *inode)
ii->i_flags = 0;
nilfs_bmap_init_gc(ii->i_bmap);
- /*
- * Add the inode to GC inode list. Garbage Collection
- * is serialized and no two processes manipulate the
- * list simultaneously.
- */
- igrab(inode);
- list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes);
-
return 0;
}
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index 9f8a2da..bfc73d3 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -149,14 +149,9 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
}
err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh);
- if (unlikely(err)) {
- if (err == -EINVAL)
- nilfs_error(sb, __func__, "ifile is broken");
- else
- nilfs_warning(sb, __func__,
- "unable to read inode: %lu",
- (unsigned long) ino);
- }
+ if (unlikely(err))
+ nilfs_warning(sb, __func__, "unable to read inode: %lu",
+ (unsigned long) ino);
return err;
}
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 71d4bc8..2fd440d 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -58,7 +58,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
struct nilfs_inode_info *ii = NILFS_I(inode);
__u64 blknum = 0;
int err = 0, ret;
- struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode));
+ struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
down_read(&NILFS_MDT(dat)->mi_sem);
@@ -96,11 +96,6 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
inode->i_ino,
(unsigned long long)blkoff);
err = 0;
- } else if (err == -EINVAL) {
- nilfs_error(inode->i_sb, __func__,
- "broken bmap (inode=%lu)\n",
- inode->i_ino);
- err = -EIO;
}
nilfs_transaction_abort(inode->i_sb);
goto out;
@@ -109,6 +104,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
nilfs_transaction_commit(inode->i_sb); /* never fails */
/* Error handling should be detailed */
set_buffer_new(bh_result);
+ set_buffer_delay(bh_result);
map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed
to proper value */
} else if (ret == -ENOENT) {
@@ -185,10 +181,9 @@ static int nilfs_set_page_dirty(struct page *page)
if (ret) {
struct inode *inode = page->mapping->host;
- struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits);
- nilfs_set_file_dirty(sbi, inode, nr_dirty);
+ nilfs_set_file_dirty(inode, nr_dirty);
}
return ret;
}
@@ -229,7 +224,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
start + copied);
copied = generic_write_end(file, mapping, pos, len, copied, page,
fsdata);
- nilfs_set_file_dirty(NILFS_SB(inode->i_sb), inode, nr_dirty);
+ nilfs_set_file_dirty(inode, nr_dirty);
err = nilfs_transaction_commit(inode->i_sb);
return err ? : copied;
}
@@ -425,13 +420,12 @@ static int __nilfs_read_inode(struct super_block *sb,
struct nilfs_root *root, unsigned long ino,
struct inode *inode)
{
- struct nilfs_sb_info *sbi = NILFS_SB(sb);
- struct inode *dat = nilfs_dat_inode(sbi->s_nilfs);
+ struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs;
struct buffer_head *bh;
struct nilfs_inode *raw_inode;
int err;
- down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
+ down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh);
if (unlikely(err))
goto bad_inode;
@@ -461,7 +455,7 @@ static int __nilfs_read_inode(struct super_block *sb,
}
nilfs_ifile_unmap_inode(root->ifile, ino, bh);
brelse(bh);
- up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
+ up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
nilfs_set_inode_flags(inode);
return 0;
@@ -470,7 +464,7 @@ static int __nilfs_read_inode(struct super_block *sb,
brelse(bh);
bad_inode:
- up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
+ up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
return err;
}
@@ -629,7 +623,7 @@ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
if (!test_bit(NILFS_I_BMAP, &ii->i_state))
return;
- repeat:
+repeat:
ret = nilfs_bmap_last_key(ii->i_bmap, &b);
if (ret == -ENOENT)
return;
@@ -646,14 +640,10 @@ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
nilfs_bmap_truncate(ii->i_bmap, b) == 0))
goto repeat;
- failed:
- if (ret == -EINVAL)
- nilfs_error(ii->vfs_inode.i_sb, __func__,
- "bmap is broken (ino=%lu)", ii->vfs_inode.i_ino);
- else
- nilfs_warning(ii->vfs_inode.i_sb, __func__,
- "failed to truncate bmap (ino=%lu, err=%d)",
- ii->vfs_inode.i_ino, ret);
+failed:
+ nilfs_warning(ii->vfs_inode.i_sb, __func__,
+ "failed to truncate bmap (ino=%lu, err=%d)",
+ ii->vfs_inode.i_ino, ret);
}
void nilfs_truncate(struct inode *inode)
@@ -682,7 +672,7 @@ void nilfs_truncate(struct inode *inode)
nilfs_set_transaction_flag(NILFS_TI_SYNC);
nilfs_mark_inode_dirty(inode);
- nilfs_set_file_dirty(NILFS_SB(sb), inode, 0);
+ nilfs_set_file_dirty(inode, 0);
nilfs_transaction_commit(sb);
/* May construct a logical segment and may fail in sync mode.
But truncate has no return value. */
@@ -785,20 +775,24 @@ out_err:
return err;
}
-int nilfs_permission(struct inode *inode, int mask)
+int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
{
- struct nilfs_root *root = NILFS_I(inode)->i_root;
+ struct nilfs_root *root;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
+ root = NILFS_I(inode)->i_root;
if ((mask & MAY_WRITE) && root &&
root->cno != NILFS_CPTREE_CURRENT_CNO)
return -EROFS; /* snapshot is not writable */
- return generic_permission(inode, mask, NULL);
+ return generic_permission(inode, mask, flags, NULL);
}
-int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode,
- struct buffer_head **pbh)
+int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
{
+ struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
struct nilfs_inode_info *ii = NILFS_I(inode);
int err;
@@ -839,9 +833,9 @@ int nilfs_inode_dirty(struct inode *inode)
return ret;
}
-int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode,
- unsigned nr_dirty)
+int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
{
+ struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
struct nilfs_inode_info *ii = NILFS_I(inode);
atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks);
@@ -874,11 +868,10 @@ int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode,
int nilfs_mark_inode_dirty(struct inode *inode)
{
- struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
struct buffer_head *ibh;
int err;
- err = nilfs_load_inode_block(sbi, inode, &ibh);
+ err = nilfs_load_inode_block(inode, &ibh);
if (unlikely(err)) {
nilfs_warning(inode->i_sb, __func__,
"failed to reget inode block.\n");
@@ -920,3 +913,134 @@ void nilfs_dirty_inode(struct inode *inode)
nilfs_mark_inode_dirty(inode);
nilfs_transaction_commit(inode->i_sb); /* never fails */
}
+
+int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ __u64 start, __u64 len)
+{
+ struct the_nilfs *nilfs = NILFS_I_NILFS(inode);
+ __u64 logical = 0, phys = 0, size = 0;
+ __u32 flags = 0;
+ loff_t isize;
+ sector_t blkoff, end_blkoff;
+ sector_t delalloc_blkoff;
+ unsigned long delalloc_blklen;
+ unsigned int blkbits = inode->i_blkbits;
+ int ret, n;
+
+ ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+ if (ret)
+ return ret;
+
+ mutex_lock(&inode->i_mutex);
+
+ isize = i_size_read(inode);
+
+ blkoff = start >> blkbits;
+ end_blkoff = (start + len - 1) >> blkbits;
+
+ delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff,
+ &delalloc_blkoff);
+
+ do {
+ __u64 blkphy;
+ unsigned int maxblocks;
+
+ if (delalloc_blklen && blkoff == delalloc_blkoff) {
+ if (size) {
+ /* End of the current extent */
+ ret = fiemap_fill_next_extent(
+ fieinfo, logical, phys, size, flags);
+ if (ret)
+ break;
+ }
+ if (blkoff > end_blkoff)
+ break;
+
+ flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC;
+ logical = blkoff << blkbits;
+ phys = 0;
+ size = delalloc_blklen << blkbits;
+
+ blkoff = delalloc_blkoff + delalloc_blklen;
+ delalloc_blklen = nilfs_find_uncommitted_extent(
+ inode, blkoff, &delalloc_blkoff);
+ continue;
+ }
+
+ /*
+ * Limit the number of blocks that we look up so as
+ * not to get into the next delayed allocation extent.
+ */
+ maxblocks = INT_MAX;
+ if (delalloc_blklen)
+ maxblocks = min_t(sector_t, delalloc_blkoff - blkoff,
+ maxblocks);
+ blkphy = 0;
+
+ down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+ n = nilfs_bmap_lookup_contig(
+ NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks);
+ up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+
+ if (n < 0) {
+ int past_eof;
+
+ if (unlikely(n != -ENOENT))
+ break; /* error */
+
+ /* HOLE */
+ blkoff++;
+ past_eof = ((blkoff << blkbits) >= isize);
+
+ if (size) {
+ /* End of the current extent */
+
+ if (past_eof)
+ flags |= FIEMAP_EXTENT_LAST;
+
+ ret = fiemap_fill_next_extent(
+ fieinfo, logical, phys, size, flags);
+ if (ret)
+ break;
+ size = 0;
+ }
+ if (blkoff > end_blkoff || past_eof)
+ break;
+ } else {
+ if (size) {
+ if (phys && blkphy << blkbits == phys + size) {
+ /* The current extent goes on */
+ size += n << blkbits;
+ } else {
+ /* Terminate the current extent */
+ ret = fiemap_fill_next_extent(
+ fieinfo, logical, phys, size,
+ flags);
+ if (ret || blkoff > end_blkoff)
+ break;
+
+ /* Start another extent */
+ flags = FIEMAP_EXTENT_MERGED;
+ logical = blkoff << blkbits;
+ phys = blkphy << blkbits;
+ size = n << blkbits;
+ }
+ } else {
+ /* Start a new extent */
+ flags = FIEMAP_EXTENT_MERGED;
+ logical = blkoff << blkbits;
+ phys = blkphy << blkbits;
+ size = n << blkbits;
+ }
+ blkoff += n;
+ }
+ cond_resched();
+ } while (true);
+
+ /* If ret is 1 then we just hit the end of the extent array */
+ if (ret == 1)
+ ret = 0;
+
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 3e90f86..4967389 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -233,7 +233,7 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
int ret;
down_read(&nilfs->ns_segctor_sem);
- ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, size, nmembs);
+ ret = nilfs_dat_get_vinfo(nilfs->ns_dat, buf, size, nmembs);
up_read(&nilfs->ns_segctor_sem);
return ret;
}
@@ -242,8 +242,7 @@ static ssize_t
nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
void *buf, size_t size, size_t nmembs)
{
- struct inode *dat = nilfs_dat_inode(nilfs);
- struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap;
+ struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap;
struct nilfs_bdesc *bdescs = buf;
int ret, i;
@@ -337,6 +336,7 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
struct nilfs_argv *argv, void *buf)
{
size_t nmembs = argv->v_nmembs;
+ struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs;
struct inode *inode;
struct nilfs_vdesc *vdesc;
struct buffer_head *bh, *n;
@@ -349,10 +349,21 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
ino = vdesc->vd_ino;
cno = vdesc->vd_cno;
inode = nilfs_iget_for_gc(sb, ino, cno);
- if (unlikely(inode == NULL)) {
- ret = -ENOMEM;
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
goto failed;
}
+ if (list_empty(&NILFS_I(inode)->i_dirty)) {
+ /*
+ * Add the inode to GC inode list. Garbage Collection
+ * is serialized and no two processes manipulate the
+ * list simultaneously.
+ */
+ igrab(inode);
+ list_add(&NILFS_I(inode)->i_dirty,
+ &nilfs->ns_gc_inodes);
+ }
+
do {
ret = nilfs_ioctl_move_inode_block(inode, vdesc,
&buffers);
@@ -409,7 +420,7 @@ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
size_t nmembs = argv->v_nmembs;
int ret;
- ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs);
+ ret = nilfs_dat_freev(nilfs->ns_dat, buf, nmembs);
return (ret < 0) ? ret : nmembs;
}
@@ -418,8 +429,7 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
struct nilfs_argv *argv, void *buf)
{
size_t nmembs = argv->v_nmembs;
- struct inode *dat = nilfs_dat_inode(nilfs);
- struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap;
+ struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap;
struct nilfs_bdesc *bdescs = buf;
int ret, i;
@@ -438,7 +448,7 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
/* skip dead block */
continue;
if (bdescs[i].bd_level == 0) {
- ret = nilfs_mdt_mark_block_dirty(dat,
+ ret = nilfs_mdt_mark_block_dirty(nilfs->ns_dat,
bdescs[i].bd_offset);
if (ret < 0) {
WARN_ON(ret == -ENOENT);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 39a5b84..6a0e2a1 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -237,8 +237,6 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
*
* %-ENOENT - the specified block does not exist (hole block)
*
- * %-EINVAL - bmap is broken. (the caller should call nilfs_error())
- *
* %-EROFS - Read only filesystem (for create mode)
*/
int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
@@ -273,8 +271,6 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
* %-ENOMEM - Insufficient memory available.
*
* %-EIO - I/O error
- *
- * %-EINVAL - bmap is broken. (the caller should call nilfs_error())
*/
int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
{
@@ -350,8 +346,6 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
* %-EIO - I/O error
*
* %-ENOENT - the specified block does not exist (hole block)
- *
- * %-EINVAL - bmap is broken. (the caller should call nilfs_error())
*/
int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
{
@@ -499,31 +493,29 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
struct buffer_head *bh_frozen;
struct page *page;
int blkbits = inode->i_blkbits;
- int ret = -ENOMEM;
page = grab_cache_page(&shadow->frozen_data, bh->b_page->index);
if (!page)
- return ret;
+ return -ENOMEM;
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << blkbits, 0);
bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits);
- if (bh_frozen) {
- if (!buffer_uptodate(bh_frozen))
- nilfs_copy_buffer(bh_frozen, bh);
- if (list_empty(&bh_frozen->b_assoc_buffers)) {
- list_add_tail(&bh_frozen->b_assoc_buffers,
- &shadow->frozen_buffers);
- set_buffer_nilfs_redirected(bh);
- } else {
- brelse(bh_frozen); /* already frozen */
- }
- ret = 0;
+
+ if (!buffer_uptodate(bh_frozen))
+ nilfs_copy_buffer(bh_frozen, bh);
+ if (list_empty(&bh_frozen->b_assoc_buffers)) {
+ list_add_tail(&bh_frozen->b_assoc_buffers,
+ &shadow->frozen_buffers);
+ set_buffer_nilfs_redirected(bh);
+ } else {
+ brelse(bh_frozen); /* already frozen */
}
+
unlock_page(page);
page_cache_release(page);
- return ret;
+ return 0;
}
struct buffer_head *
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 6e9557e..9803427 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -577,6 +577,7 @@ const struct inode_operations nilfs_dir_inode_operations = {
.rename = nilfs_rename,
.setattr = nilfs_setattr,
.permission = nilfs_permission,
+ .fiemap = nilfs_fiemap,
};
const struct inode_operations nilfs_special_inode_operations = {
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index f7560da..777e8fd 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -190,11 +190,6 @@ static inline int nilfs_doing_construction(void)
return nilfs_test_transaction_flag(NILFS_TI_WRITER);
}
-static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs)
-{
- return nilfs->ns_dat;
-}
-
/*
* function prototype
*/
@@ -256,14 +251,14 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *);
extern void nilfs_truncate(struct inode *);
extern void nilfs_evict_inode(struct inode *);
extern int nilfs_setattr(struct dentry *, struct iattr *);
-int nilfs_permission(struct inode *inode, int mask);
-extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *,
- struct buffer_head **);
+int nilfs_permission(struct inode *inode, int mask, unsigned int flags);
+int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
extern int nilfs_inode_dirty(struct inode *);
-extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *,
- unsigned);
+int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty);
extern int nilfs_mark_inode_dirty(struct inode *);
extern void nilfs_dirty_inode(struct inode *);
+int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ __u64 start, __u64 len);
/* super.c */
extern struct inode *nilfs_alloc_inode(struct super_block *);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index a6c3c2e8..0c43241 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -491,7 +491,7 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
}
return nc;
}
-
+
void nilfs_mapping_init_once(struct address_space *mapping)
{
memset(mapping, 0, sizeof(*mapping));
@@ -546,3 +546,87 @@ int __nilfs_clear_page_dirty(struct page *page)
}
return TestClearPageDirty(page);
}
+
+/**
+ * nilfs_find_uncommitted_extent - find extent of uncommitted data
+ * @inode: inode
+ * @start_blk: start block offset (in)
+ * @blkoff: start offset of the found extent (out)
+ *
+ * This function searches an extent of buffers marked "delayed" which
+ * starts from a block offset equal to or larger than @start_blk. If
+ * such an extent was found, this will store the start offset in
+ * @blkoff and return its length in blocks. Otherwise, zero is
+ * returned.
+ */
+unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
+ sector_t start_blk,
+ sector_t *blkoff)
+{
+ unsigned int i;
+ pgoff_t index;
+ unsigned int nblocks_in_page;
+ unsigned long length = 0;
+ sector_t b;
+ struct pagevec pvec;
+ struct page *page;
+
+ if (inode->i_mapping->nrpages == 0)
+ return 0;
+
+ index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+ pagevec_init(&pvec, 0);
+
+repeat:
+ pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
+ pvec.pages);
+ if (pvec.nr == 0)
+ return length;
+
+ if (length > 0 && pvec.pages[0]->index > index)
+ goto out;
+
+ b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ i = 0;
+ do {
+ page = pvec.pages[i];
+
+ lock_page(page);
+ if (page_has_buffers(page)) {
+ struct buffer_head *bh, *head;
+
+ bh = head = page_buffers(page);
+ do {
+ if (b < start_blk)
+ continue;
+ if (buffer_delay(bh)) {
+ if (length == 0)
+ *blkoff = b;
+ length++;
+ } else if (length > 0) {
+ goto out_locked;
+ }
+ } while (++b, bh = bh->b_this_page, bh != head);
+ } else {
+ if (length > 0)
+ goto out_locked;
+
+ b += nblocks_in_page;
+ }
+ unlock_page(page);
+
+ } while (++i < pagevec_count(&pvec));
+
+ index = page->index + 1;
+ pagevec_release(&pvec);
+ cond_resched();
+ goto repeat;
+
+out_locked:
+ unlock_page(page);
+out:
+ pagevec_release(&pvec);
+ return length;
+}
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index fb9e8a8..622df27 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -66,6 +66,9 @@ void nilfs_mapping_init(struct address_space *mapping,
struct backing_dev_info *bdi,
const struct address_space_operations *aops);
unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
+unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
+ sector_t start_blk,
+ sector_t *blkoff);
#define NILFS_PAGE_BUG(page, m, a...) \
do { nilfs_page_bug(page); BUG(); } while (0)
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 5d2711c2..3dfcd3b 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -535,7 +535,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
if (unlikely(err))
goto failed_page;
- err = nilfs_set_file_dirty(sbi, inode, 1);
+ err = nilfs_set_file_dirty(inode, 1);
if (unlikely(err))
goto failed_page;
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h
index 35a0715..7a17715 100644
--- a/fs/nilfs2/sb.h
+++ b/fs/nilfs2/sb.h
@@ -27,14 +27,6 @@
#include <linux/types.h>
#include <linux/fs.h>
-/*
- * Mount options
- */
-struct nilfs_mount_options {
- unsigned long mount_opt;
- __u64 snapshot_cno;
-};
-
struct the_nilfs;
struct nilfs_sc_info;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 687d090..55ebae5 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -504,17 +504,6 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
return err;
}
-static int nilfs_handle_bmap_error(int err, const char *fname,
- struct inode *inode, struct super_block *sb)
-{
- if (err == -EINVAL) {
- nilfs_error(sb, fname, "broken bmap (inode=%lu)\n",
- inode->i_ino);
- err = -EIO;
- }
- return err;
-}
-
/*
* Callback functions that enumerate, mark, and collect dirty blocks
*/
@@ -524,9 +513,8 @@ static int nilfs_collect_file_data(struct nilfs_sc_info *sci,
int err;
err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
- if (unlikely(err < 0))
- return nilfs_handle_bmap_error(err, __func__, inode,
- sci->sc_super);
+ if (err < 0)
+ return err;
err = nilfs_segctor_add_file_block(sci, bh, inode,
sizeof(struct nilfs_binfo_v));
@@ -539,13 +527,7 @@ static int nilfs_collect_file_node(struct nilfs_sc_info *sci,
struct buffer_head *bh,
struct inode *inode)
{
- int err;
-
- err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
- if (unlikely(err < 0))
- return nilfs_handle_bmap_error(err, __func__, inode,
- sci->sc_super);
- return 0;
+ return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
}
static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci,
@@ -588,9 +570,8 @@ static int nilfs_collect_dat_data(struct nilfs_sc_info *sci,
int err;
err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
- if (unlikely(err < 0))
- return nilfs_handle_bmap_error(err, __func__, inode,
- sci->sc_super);
+ if (err < 0)
+ return err;
err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
if (!err)
@@ -776,9 +757,8 @@ static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
ret++;
if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile))
ret++;
- if (ret || nilfs_doing_gc())
- if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs)))
- ret++;
+ if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat))
+ ret++;
return ret;
}
@@ -814,7 +794,7 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
nilfs_mdt_clear_dirty(sci->sc_root->ifile);
nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
nilfs_mdt_clear_dirty(nilfs->ns_sufile);
- nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs));
+ nilfs_mdt_clear_dirty(nilfs->ns_dat);
}
static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
@@ -923,7 +903,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
raw_sr->sr_flags = 0;
- nilfs_write_inode_common(nilfs_dat_inode(nilfs), (void *)raw_sr +
+ nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr +
NILFS_SR_DAT_OFFSET(isz), 1);
nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
NILFS_SR_CPFILE_OFFSET(isz), 1);
@@ -1179,7 +1159,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
sci->sc_stage.scnt++; /* Fall through */
case NILFS_ST_DAT:
dat_stage:
- err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs),
+ err = nilfs_segctor_scan_file(sci, nilfs->ns_dat,
&nilfs_sc_dat_ops);
if (unlikely(err))
break;
@@ -1563,7 +1543,6 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
return 0;
failed_bmap:
- err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super);
return err;
}
@@ -1783,6 +1762,7 @@ static void nilfs_clear_copied_buffers(struct list_head *list, int err)
if (!err) {
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
+ clear_buffer_delay(bh);
clear_buffer_nilfs_volatile(bh);
}
brelse(bh); /* for b_assoc_buffers */
@@ -1909,6 +1889,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
b_assoc_buffers) {
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
+ clear_buffer_delay(bh);
clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_redirected(bh);
if (bh == segbuf->sb_super_root) {
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 35ae03c..70dfdd5 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -47,7 +47,6 @@
#include <linux/crc32.h>
#include <linux/vfs.h>
#include <linux/writeback.h>
-#include <linux/kobject.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
#include "nilfs.h"
@@ -111,12 +110,17 @@ void nilfs_error(struct super_block *sb, const char *function,
const char *fmt, ...)
{
struct nilfs_sb_info *sbi = NILFS_SB(sb);
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
if (!(sb->s_flags & MS_RDONLY)) {
@@ -136,13 +140,17 @@ void nilfs_error(struct super_block *sb, const char *function,
void nilfs_warning(struct super_block *sb, const char *function,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_WARNING "NILFS warning (device %s): %s: ",
- sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_WARNING "NILFS warning (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
}
@@ -162,10 +170,13 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
return &ii->vfs_inode;
}
-void nilfs_destroy_inode(struct inode *inode)
+static void nilfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
+
if (mdi) {
kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
kfree(mdi);
@@ -173,6 +184,11 @@ void nilfs_destroy_inode(struct inode *inode)
kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
}
+void nilfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, nilfs_i_callback);
+}
+
static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
{
struct the_nilfs *nilfs = sbi->s_nilfs;
@@ -838,7 +854,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
static int nilfs_tree_was_touched(struct dentry *root_dentry)
{
- return atomic_read(&root_dentry->d_count) > 1;
+ return root_dentry->d_count > 1;
}
/**
@@ -1002,11 +1018,11 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
struct nilfs_sb_info *sbi = NILFS_SB(sb);
struct the_nilfs *nilfs = sbi->s_nilfs;
unsigned long old_sb_flags;
- struct nilfs_mount_options old_opts;
+ unsigned long old_mount_opt;
int err;
old_sb_flags = sb->s_flags;
- old_opts.mount_opt = sbi->s_mount_opt;
+ old_mount_opt = sbi->s_mount_opt;
if (!parse_options(data, sb, 1)) {
err = -EINVAL;
@@ -1075,7 +1091,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
restore_opts:
sb->s_flags = old_sb_flags;
- sbi->s_mount_opt = old_opts.mount_opt;
+ sbi->s_mount_opt = old_mount_opt;
return err;
}
@@ -1141,9 +1157,9 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data)
return (void *)s->s_bdev == data;
}
-static int
-nilfs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *
+nilfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
struct nilfs_super_data sd;
struct super_block *s;
@@ -1156,7 +1172,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
if (IS_ERR(sd.bdev))
- return PTR_ERR(sd.bdev);
+ return ERR_CAST(sd.bdev);
sd.cno = 0;
sd.flags = flags;
@@ -1235,9 +1251,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
if (!s_new)
close_bdev_exclusive(sd.bdev, mode);
- mnt->mnt_sb = s;
- mnt->mnt_root = root_dentry;
- return 0;
+ return root_dentry;
failed_super:
deactivate_locked_super(s);
@@ -1245,13 +1259,13 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
failed:
if (!s_new)
close_bdev_exclusive(sd.bdev, mode);
- return err;
+ return ERR_PTR(err);
}
struct file_system_type nilfs_fs_type = {
.owner = THIS_MODULE,
.name = "nilfs2",
- .get_sb = nilfs_get_sb,
+ .mount = nilfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 0254be2..ad4ac60 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -329,7 +329,6 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
printk(KERN_INFO "NILFS: recovery complete.\n");
skip_recovery:
- set_nilfs_loaded(nilfs);
nilfs_clear_recovery_info(&ri);
sbi->s_super->s_flags = s_flags;
return 0;
@@ -651,12 +650,11 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
{
- struct inode *dat = nilfs_dat_inode(nilfs);
unsigned long ncleansegs;
- down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
+ down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
- up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
+ up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
*nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
return 0;
}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 69226e1..fd85e4c 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -36,8 +36,6 @@
/* the_nilfs struct */
enum {
THE_NILFS_INIT = 0, /* Information from super_block is set */
- THE_NILFS_LOADED, /* Roll-back/roll-forward has done and
- the latest checkpoint was loaded */
THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
THE_NILFS_GC_RUNNING, /* gc process is running */
THE_NILFS_SB_DIRTY, /* super block is dirty */
@@ -178,7 +176,6 @@ static inline int nilfs_##name(struct the_nilfs *nilfs) \
}
THE_NILFS_FNS(INIT, init)
-THE_NILFS_FNS(LOADED, loaded)
THE_NILFS_FNS(DISCONTINUED, discontinued)
THE_NILFS_FNS(GC_RUNNING, gc_running)
THE_NILFS_FNS(SB_DIRTY, sb_dirty)
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index b388443..22c629e 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,4 +3,4 @@ config FSNOTIFY
source "fs/notify/dnotify/Kconfig"
source "fs/notify/inotify/Kconfig"
-#source "fs/notify/fanotify/Kconfig"
+source "fs/notify/fanotify/Kconfig"
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index 3ac36b7b..7dceff0 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -6,7 +6,7 @@ config FANOTIFY
---help---
Say Y here to enable fanotify suport. fanotify is a file access
notification system which differs from inotify in that it sends
- and open file descriptor to the userspace listener along with
+ an open file descriptor to the userspace listener along with
the event.
If unsure, say Y.
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 85366c7..f35794b 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -92,7 +92,11 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- wait_event(group->fanotify_data.access_waitq, event->response);
+ wait_event(group->fanotify_data.access_waitq, event->response ||
+ atomic_read(&group->fanotify_data.bypass_perm));
+
+ if (!event->response) /* bypass_perm set */
+ return 0;
/* userspace responded, convert to something usable */
spin_lock(&event->lock);
@@ -131,6 +135,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
+ BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
@@ -160,20 +165,21 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
__u32 event_mask, void *data, int data_type)
{
__u32 marks_mask, marks_ignored_mask;
+ struct path *path = data;
pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
"mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
inode_mark, vfsmnt_mark, event_mask, data, data_type);
- /* sorry, fanotify only gives a damn about files and dirs */
- if (!S_ISREG(to_tell->i_mode) &&
- !S_ISDIR(to_tell->i_mode))
- return false;
-
/* if we don't have enough info to send an event to userspace say no */
if (data_type != FSNOTIFY_EVENT_PATH)
return false;
+ /* sorry, fanotify only gives a damn about files and dirs */
+ if (!S_ISREG(path->dentry->d_inode->i_mode) &&
+ !S_ISDIR(path->dentry->d_inode->i_mode))
+ return false;
+
if (inode_mark && vfsmnt_mark) {
marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
@@ -194,16 +200,29 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
BUG();
}
+ if (S_ISDIR(path->dentry->d_inode->i_mode) &&
+ (marks_ignored_mask & FS_ISDIR))
+ return false;
+
if (event_mask & marks_mask & ~marks_ignored_mask)
return true;
return false;
}
+static void fanotify_free_group_priv(struct fsnotify_group *group)
+{
+ struct user_struct *user;
+
+ user = group->fanotify_data.user;
+ atomic_dec(&user->fanotify_listeners);
+ free_uid(user);
+}
+
const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event,
.should_send_event = fanotify_should_send_event,
- .free_group_priv = NULL,
+ .free_group_priv = fanotify_free_group_priv,
.free_event_priv = NULL,
.freeing_mark = NULL,
};
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index bbcb98e..8b61220 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,10 @@
#include <asm/ioctls.h>
+#define FANOTIFY_DEFAULT_MAX_EVENTS 16384
+#define FANOTIFY_DEFAULT_MAX_MARKS 8192
+#define FANOTIFY_DEFAULT_MAX_LISTENERS 128
+
extern const struct fsnotify_ops fanotify_fsnotify_ops;
static struct kmem_cache *fanotify_mark_cache __read_mostly;
@@ -102,20 +106,29 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
return client_fd;
}
-static ssize_t fill_event_metadata(struct fsnotify_group *group,
+static int fill_event_metadata(struct fsnotify_group *group,
struct fanotify_event_metadata *metadata,
struct fsnotify_event *event)
{
+ int ret = 0;
+
pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
group, metadata, event);
metadata->event_len = FAN_EVENT_METADATA_LEN;
+ metadata->metadata_len = FAN_EVENT_METADATA_LEN;
metadata->vers = FANOTIFY_METADATA_VERSION;
metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
metadata->pid = pid_vnr(event->tgid);
- metadata->fd = create_fd(group, event);
+ if (unlikely(event->mask & FAN_Q_OVERFLOW))
+ metadata->fd = FAN_NOFD;
+ else {
+ metadata->fd = create_fd(group, event);
+ if (metadata->fd < 0)
+ ret = metadata->fd;
+ }
- return metadata->fd;
+ return ret;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
@@ -196,7 +209,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
mutex_lock(&group->fanotify_data.access_mutex);
- if (group->fanotify_data.bypass_perm) {
+ if (atomic_read(&group->fanotify_data.bypass_perm)) {
mutex_unlock(&group->fanotify_data.access_mutex);
kmem_cache_free(fanotify_response_event_cache, re);
event->response = FAN_ALLOW;
@@ -253,24 +266,34 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- fd = fill_event_metadata(group, &fanotify_event_metadata, event);
- if (fd < 0)
- return fd;
+ ret = fill_event_metadata(group, &fanotify_event_metadata, event);
+ if (ret < 0)
+ goto out;
+ fd = fanotify_event_metadata.fd;
ret = prepare_for_access_response(group, event, fd);
if (ret)
goto out_close_fd;
ret = -EFAULT;
- if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN))
+ if (copy_to_user(buf, &fanotify_event_metadata,
+ fanotify_event_metadata.event_len))
goto out_kill_access_response;
- return FAN_EVENT_METADATA_LEN;
+ return fanotify_event_metadata.event_len;
out_kill_access_response:
remove_access_response(group, event, fd);
out_close_fd:
- sys_close(fd);
+ if (fd != FAN_NOFD)
+ sys_close(fd);
+out:
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+ if (event->mask & FAN_ALL_PERM_EVENTS) {
+ event->response = FAN_DENY;
+ wake_up(&group->fanotify_data.access_waitq);
+ }
+#endif
return ret;
}
@@ -326,7 +349,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
ret = -EAGAIN;
if (file->f_flags & O_NONBLOCK)
break;
- ret = -EINTR;
+ ret = -ERESTARTSYS;
if (signal_pending(current))
break;
@@ -372,14 +395,13 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
static int fanotify_release(struct inode *ignored, struct file *file)
{
struct fsnotify_group *group = file->private_data;
- struct fanotify_response_event *re, *lre;
-
- pr_debug("%s: file=%p group=%p\n", __func__, file, group);
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+ struct fanotify_response_event *re, *lre;
+
mutex_lock(&group->fanotify_data.access_mutex);
- group->fanotify_data.bypass_perm = true;
+ atomic_inc(&group->fanotify_data.bypass_perm);
list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
@@ -554,18 +576,24 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
__u32 mask,
unsigned int flags)
{
- __u32 oldmask;
+ __u32 oldmask = -1;
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
oldmask = fsn_mark->mask;
fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
} else {
- oldmask = fsn_mark->ignored_mask;
- fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask));
+ __u32 tmask = fsn_mark->ignored_mask | mask;
+ fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
}
+
+ if (!(flags & FAN_MARK_ONDIR)) {
+ __u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR;
+ fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
+ }
+
spin_unlock(&fsn_mark->lock);
return mask & ~oldmask;
@@ -577,10 +605,12 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
{
struct fsnotify_mark *fsn_mark;
__u32 added;
+ int ret = 0;
fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
if (!fsn_mark) {
- int ret;
+ if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
+ return -ENOSPC;
fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
if (!fsn_mark)
@@ -588,17 +618,16 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
fsnotify_init_mark(fsn_mark, fanotify_free_mark);
ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
- if (ret) {
- fanotify_free_mark(fsn_mark);
- return ret;
- }
+ if (ret)
+ goto err;
}
added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
- fsnotify_put_mark(fsn_mark);
+
if (added & ~mnt->mnt_fsnotify_mask)
fsnotify_recalc_vfsmount_mask(mnt);
-
- return 0;
+err:
+ fsnotify_put_mark(fsn_mark);
+ return ret;
}
static int fanotify_add_inode_mark(struct fsnotify_group *group,
@@ -607,12 +636,24 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
{
struct fsnotify_mark *fsn_mark;
__u32 added;
+ int ret = 0;
pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
+ /*
+ * If some other task has this inode open for write we should not add
+ * an ignored mark, unless that ignored mark is supposed to survive
+ * modification changes anyway.
+ */
+ if ((flags & FAN_MARK_IGNORED_MASK) &&
+ !(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+ (atomic_read(&inode->i_writecount) > 0))
+ return 0;
+
fsn_mark = fsnotify_find_inode_mark(group, inode);
if (!fsn_mark) {
- int ret;
+ if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
+ return -ENOSPC;
fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
if (!fsn_mark)
@@ -620,16 +661,16 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
fsnotify_init_mark(fsn_mark, fanotify_free_mark);
ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
- if (ret) {
- fanotify_free_mark(fsn_mark);
- return ret;
- }
+ if (ret)
+ goto err;
}
added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
- fsnotify_put_mark(fsn_mark);
+
if (added & ~inode->i_fsnotify_mask)
fsnotify_recalc_inode_mask(inode);
- return 0;
+err:
+ fsnotify_put_mark(fsn_mark);
+ return ret;
}
/* fanotify syscalls */
@@ -637,6 +678,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
{
struct fsnotify_group *group;
int f_flags, fd;
+ struct user_struct *user;
pr_debug("%s: flags=%d event_f_flags=%d\n",
__func__, flags, event_f_flags);
@@ -647,6 +689,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
if (flags & ~FAN_ALL_INIT_FLAGS)
return -EINVAL;
+ user = get_current_user();
+ if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
+ free_uid(user);
+ return -EMFILE;
+ }
+
f_flags = O_RDWR | FMODE_NONOTIFY;
if (flags & FAN_CLOEXEC)
f_flags |= O_CLOEXEC;
@@ -655,15 +703,53 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
/* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
- if (IS_ERR(group))
+ if (IS_ERR(group)) {
+ free_uid(user);
return PTR_ERR(group);
+ }
+
+ group->fanotify_data.user = user;
+ atomic_inc(&user->fanotify_listeners);
group->fanotify_data.f_flags = event_f_flags;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
mutex_init(&group->fanotify_data.access_mutex);
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
+ atomic_set(&group->fanotify_data.bypass_perm, 0);
#endif
+ switch (flags & FAN_ALL_CLASS_BITS) {
+ case FAN_CLASS_NOTIF:
+ group->priority = FS_PRIO_0;
+ break;
+ case FAN_CLASS_CONTENT:
+ group->priority = FS_PRIO_1;
+ break;
+ case FAN_CLASS_PRE_CONTENT:
+ group->priority = FS_PRIO_2;
+ break;
+ default:
+ fd = -EINVAL;
+ goto out_put_group;
+ }
+
+ if (flags & FAN_UNLIMITED_QUEUE) {
+ fd = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto out_put_group;
+ group->max_events = UINT_MAX;
+ } else {
+ group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
+ }
+
+ if (flags & FAN_UNLIMITED_MARKS) {
+ fd = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto out_put_group;
+ group->fanotify_data.max_marks = UINT_MAX;
+ } else {
+ group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
+ }
fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
if (fd < 0)
@@ -697,13 +783,21 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
if (flags & ~FAN_ALL_MARK_FLAGS)
return -EINVAL;
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
- case FAN_MARK_ADD:
+ case FAN_MARK_ADD: /* fallthrough */
case FAN_MARK_REMOVE:
+ if (!mask)
+ return -EINVAL;
case FAN_MARK_FLUSH:
break;
default:
return -EINVAL;
}
+
+ if (mask & FAN_ONDIR) {
+ flags |= FAN_MARK_ONDIR;
+ mask &= ~FAN_ONDIR;
+ }
+
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD))
#else
@@ -719,6 +813,16 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
ret = -EINVAL;
if (unlikely(filp->f_op != &fanotify_fops))
goto fput_and_out;
+ group = filp->private_data;
+
+ /*
+ * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
+ * allowed to set permissions events.
+ */
+ ret = -EINVAL;
+ if (mask & FAN_ALL_PERM_EVENTS &&
+ group->priority == FS_PRIO_0)
+ goto fput_and_out;
ret = fanotify_find_path(dfd, pathname, &path, flags);
if (ret)
@@ -729,7 +833,6 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
inode = path.dentry->d_inode;
else
mnt = path.mnt;
- group = filp->private_data;
/* create/update an inode mark */
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4498a20..79b47cb 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -59,7 +59,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
/* determine if the children should tell inode about their events */
watched = fsnotify_inode_watches_children(inode);
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
/* run all of the dentries associated with this inode. Since this is a
* directory, there damn well better only be one item on this list */
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
@@ -68,32 +68,35 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
/* run all of the children of the original inode and fix their
* d_flags to indicate parental interest (their parent is the
* original inode) */
+ spin_lock(&alias->d_lock);
list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
if (!child->d_inode)
continue;
- spin_lock(&child->d_lock);
+ spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
if (watched)
child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
else
child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
spin_unlock(&child->d_lock);
}
+ spin_unlock(&alias->d_lock);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
}
/* Notify this dentry's parent about a child's events. */
-void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
{
struct dentry *parent;
struct inode *p_inode;
+ int ret = 0;
if (!dentry)
dentry = path->dentry;
if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
- return;
+ return 0;
parent = dget_parent(dentry);
p_inode = parent->d_inode;
@@ -106,14 +109,16 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
mask |= FS_EVENT_ON_CHILD;
if (path)
- fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
- dentry->d_name.name, 0);
+ ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
+ dentry->d_name.name, 0);
else
- fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
- dentry->d_name.name, 0);
+ ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
+ dentry->d_name.name, 0);
}
dput(parent);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);
@@ -252,20 +257,23 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
if (inode_group > vfsmount_group) {
/* handle inode */
- send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
- data_is, cookie, file_name, &event);
+ ret = send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
+ data_is, cookie, file_name, &event);
/* we didn't use the vfsmount_mark */
vfsmount_group = NULL;
} else if (vfsmount_group > inode_group) {
- send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
- data_is, cookie, file_name, &event);
+ ret = send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
+ data_is, cookie, file_name, &event);
inode_group = NULL;
} else {
- send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
- mask, data, data_is, cookie, file_name,
- &event);
+ ret = send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
+ mask, data, data_is, cookie, file_name,
+ &event);
}
+ if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
+ goto out;
+
if (inode_group)
inode_node = srcu_dereference(inode_node->next,
&fsnotify_mark_srcu);
@@ -273,7 +281,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
vfsmount_node = srcu_dereference(vfsmount_node->next,
&fsnotify_mark_srcu);
}
-
+ ret = 0;
+out:
srcu_read_unlock(&fsnotify_mark_srcu, idx);
/*
* fsnotify_create_event() took a reference so the event can't be cleaned
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 21ed106..4c29fcf 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -177,7 +177,8 @@ void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
* Attach an initialized mark to a given inode.
* These marks may be used for the fsnotify backend to determine which
* event types should be delivered to which group and for which inodes. These
- * marks are ordered according to the group's location in memory.
+ * marks are ordered according to priority, highest number first, and then by
+ * the group's location in memory.
*/
int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct inode *inode,
@@ -211,7 +212,11 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
goto out;
}
- if (mark->group < lmark->group)
+ if (mark->group->priority < lmark->group->priority)
+ continue;
+
+ if ((mark->group->priority == lmark->group->priority) &&
+ (mark->group < lmark->group))
continue;
hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 24edc11..4cd5d5d 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -752,6 +752,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
if (ret >= 0)
return ret;
+ fsnotify_put_group(group);
atomic_dec(&user->inotify_devs);
out_free_uid:
free_uid(user);
@@ -862,7 +863,7 @@ static int __init inotify_user_setup(void)
BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
- BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
+ BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 56772b5..85eebff 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -169,7 +169,11 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
goto out;
}
- if (mark->group < lmark->group)
+ if (mark->group->priority < lmark->group->priority)
+ continue;
+
+ if ((mark->group->priority == lmark->group->priority) &&
+ (mark->group < lmark->group))
continue;
hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 58b6be9..4ff028f 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
unistr.o upcase.o
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.29\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.30\"
ifeq ($(CONFIG_NTFS_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 113ebd9..f4b1057 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
/*
* file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2007 Anton Altaparmakov
+ * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -1380,15 +1380,14 @@ static inline void ntfs_set_next_iovec(const struct iovec **iovp,
* pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
* single-segment behaviour.
*
- * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both
- * when atomic and when not atomic. This is ok because
- * __ntfs_copy_from_user_iovec_inatomic() calls __copy_from_user_inatomic()
- * and it is ok to call this when non-atomic.
- * Infact, the only difference between __copy_from_user_inatomic() and
+ * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
+ * atomic and when not atomic. This is ok because it calls
+ * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In
+ * fact, the only difference between __copy_from_user_inatomic() and
* __copy_from_user() is that the latter calls might_sleep() and the former
- * should not zero the tail of the buffer on error. And on many
- * architectures __copy_from_user_inatomic() is just defined to
- * __copy_from_user() so it makes no difference at all on those architectures.
+ * should not zero the tail of the buffer on error. And on many architectures
+ * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
+ * makes no difference at all on those architectures.
*/
static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
unsigned nr_pages, unsigned ofs, const struct iovec **iov,
@@ -1409,28 +1408,28 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
if (unlikely(copied != len)) {
/* Do it the slow way. */
addr = kmap(*pages);
- copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
- *iov, *iov_ofs, len);
- /*
- * Zero the rest of the target like __copy_from_user().
- */
- memset(addr + ofs + copied, 0, len - copied);
- kunmap(*pages);
+ copied = __ntfs_copy_from_user_iovec_inatomic(addr +
+ ofs, *iov, *iov_ofs, len);
if (unlikely(copied != len))
goto err_out;
+ kunmap(*pages);
}
total += len;
+ ntfs_set_next_iovec(iov, iov_ofs, len);
bytes -= len;
if (!bytes)
break;
- ntfs_set_next_iovec(iov, iov_ofs, len);
ofs = 0;
} while (++pages < last_page);
out:
return total;
err_out:
- total += copied;
+ BUG_ON(copied > len);
/* Zero the rest of the target like __copy_from_user(). */
+ memset(addr + ofs + copied, 0, len - copied);
+ kunmap(*pages);
+ total += copied;
+ ntfs_set_next_iovec(iov, iov_ofs, copied);
while (++pages < last_page) {
bytes -= len;
if (!bytes)
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 93622b1..a627ed8 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -332,6 +332,13 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
return NULL;
}
+static void ntfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
+}
+
void ntfs_destroy_big_inode(struct inode *inode)
{
ntfs_inode *ni = NTFS_I(inode);
@@ -340,7 +347,7 @@ void ntfs_destroy_big_inode(struct inode *inode)
BUG_ON(ni->page);
if (!atomic_dec_and_test(&ni->count))
BUG();
- kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
+ call_rcu(&inode->i_rcu, ntfs_i_callback);
}
static inline ntfs_inode *ntfs_alloc_extent_inode(void)
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index d3fbe57..29099a0 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1,7 +1,7 @@
/*
* super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2007 Anton Altaparmakov
+ * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
* Copyright (c) 2001,2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -3059,17 +3059,16 @@ struct kmem_cache *ntfs_index_ctx_cache;
/* Driver wide mutex. */
DEFINE_MUTEX(ntfs_lock);
-static int ntfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ntfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
}
static struct file_system_type ntfs_fs_type = {
.owner = THIS_MODULE,
.name = "ntfs",
- .get_sb = ntfs_get_sb,
+ .mount = ntfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -3194,8 +3193,8 @@ static void __exit exit_ntfs_fs(void)
ntfs_sysctl(0);
}
-MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>");
-MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2007 Anton Altaparmakov");
+MODULE_AUTHOR("Anton Altaparmakov <anton@tuxera.com>");
+MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.");
MODULE_VERSION(NTFS_VERSION);
MODULE_LICENSE("GPL");
#ifdef DEBUG
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 0d84066..ab152c0 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -51,7 +51,7 @@ config OCFS2_FS_USERSPACE_CLUSTER
config OCFS2_FS_STATS
bool "OCFS2 statistics"
- depends on OCFS2_FS
+ depends on OCFS2_FS && DEBUG_FS
default y
help
This option allows some fs statistics to be captured. Enabling
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 3919150..704f6b1 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -291,13 +291,17 @@ static int ocfs2_set_acl(handle_t *handle,
return ret;
}
-int ocfs2_check_acl(struct inode *inode, int mask)
+int ocfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_super *osb;
struct buffer_head *di_bh = NULL;
struct posix_acl *acl;
int ret = -EAGAIN;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
+ osb = OCFS2_SB(inode->i_sb);
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return ret;
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 5c5d31f..4fe7c9c 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,7 +26,7 @@ struct ocfs2_acl_entry {
__le32 e_id;
};
-extern int ocfs2_check_acl(struct inode *, int);
+extern int ocfs2_check_acl(struct inode *, int, unsigned int);
extern int ocfs2_acl_chmod(struct inode *);
extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
struct buffer_head *, struct buffer_head *,
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 592fae5..e4984e2 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -565,7 +565,6 @@ static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
return ret;
}
-static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
struct ocfs2_extent_block *eb);
static void ocfs2_adjust_rightmost_records(handle_t *handle,
@@ -5858,6 +5857,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
ocfs2_journal_dirty(handle, tl_bh);
+ osb->truncated_clusters += num_clusters;
bail:
mlog_exit(status);
return status;
@@ -5929,6 +5929,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
i--;
}
+ osb->truncated_clusters = 0;
+
bail:
mlog_exit(status);
return status;
@@ -7139,64 +7141,6 @@ bail:
}
/*
- * Expects the inode to already be locked.
- */
-int ocfs2_prepare_truncate(struct ocfs2_super *osb,
- struct inode *inode,
- struct buffer_head *fe_bh,
- struct ocfs2_truncate_context **tc)
-{
- int status;
- unsigned int new_i_clusters;
- struct ocfs2_dinode *fe;
- struct ocfs2_extent_block *eb;
- struct buffer_head *last_eb_bh = NULL;
-
- mlog_entry_void();
-
- *tc = NULL;
-
- new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
- i_size_read(inode));
- fe = (struct ocfs2_dinode *) fe_bh->b_data;
-
- mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
- "%llu\n", le32_to_cpu(fe->i_clusters), new_i_clusters,
- (unsigned long long)le64_to_cpu(fe->i_size));
-
- *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
- if (!(*tc)) {
- status = -ENOMEM;
- mlog_errno(status);
- goto bail;
- }
- ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
-
- if (fe->id2.i_list.l_tree_depth) {
- status = ocfs2_read_extent_block(INODE_CACHE(inode),
- le64_to_cpu(fe->i_last_eb_blk),
- &last_eb_bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
- eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
- }
-
- (*tc)->tc_last_eb_bh = last_eb_bh;
-
- status = 0;
-bail:
- if (status < 0) {
- if (*tc)
- ocfs2_free_truncate_context(*tc);
- *tc = NULL;
- }
- mlog_exit_void();
- return status;
-}
-
-/*
* 'start' is inclusive, 'end' is not.
*/
int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
@@ -7270,18 +7214,3 @@ out_commit:
out:
return ret;
}
-
-static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
-{
- /*
- * The caller is responsible for completing deallocation
- * before freeing the context.
- */
- if (tc->tc_dealloc.c_first_suballocator != NULL)
- mlog(ML_NOTICE,
- "Truncate completion has non-empty dealloc context\n");
-
- brelse(tc->tc_last_eb_bh);
-
- kfree(tc);
-}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 55762b5..3bd08a0 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -228,10 +228,6 @@ struct ocfs2_truncate_context {
int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
u64 range_start, u64 range_end);
-int ocfs2_prepare_truncate(struct ocfs2_super *osb,
- struct inode *inode,
- struct buffer_head *fe_bh,
- struct ocfs2_truncate_context **tc);
int ocfs2_commit_truncate(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *di_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1e962c..1fbb0e2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -573,11 +573,14 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
/* this io's submitter should not have unlocked this before we could */
BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
+ if (ocfs2_iocb_is_sem_locked(iocb)) {
+ up_read(&inode->i_alloc_sem);
+ ocfs2_iocb_clear_sem_locked(iocb);
+ }
+
ocfs2_iocb_clear_rw_locked(iocb);
level = ocfs2_iocb_rw_locked_level(iocb);
- if (!level)
- up_read(&inode->i_alloc_sem);
ocfs2_rw_unlock(inode, level);
if (is_async)
@@ -1627,6 +1630,43 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
return ret;
}
+/*
+ * Try to flush truncate logs if we can free enough clusters from it.
+ * As for return value, "< 0" means error, "0" no space and "1" means
+ * we have freed enough spaces and let the caller try to allocate again.
+ */
+static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
+ unsigned int needed)
+{
+ tid_t target;
+ int ret = 0;
+ unsigned int truncated_clusters;
+
+ mutex_lock(&osb->osb_tl_inode->i_mutex);
+ truncated_clusters = osb->truncated_clusters;
+ mutex_unlock(&osb->osb_tl_inode->i_mutex);
+
+ /*
+ * Check whether we can succeed in allocating if we free
+ * the truncate log.
+ */
+ if (truncated_clusters < needed)
+ goto out;
+
+ ret = ocfs2_flush_truncate_log(osb);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
+ jbd2_log_wait_commit(osb->journal->j_journal, target);
+ ret = 1;
+ }
+out:
+ return ret;
+}
+
int ocfs2_write_begin_nolock(struct file *filp,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
@@ -1634,7 +1674,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
struct buffer_head *di_bh, struct page *mmap_page)
{
int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
- unsigned int clusters_to_alloc, extents_to_split;
+ unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
struct ocfs2_write_ctxt *wc;
struct inode *inode = mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -1643,7 +1683,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
struct ocfs2_alloc_context *meta_ac = NULL;
handle_t *handle;
struct ocfs2_extent_tree et;
+ int try_free = 1, ret1;
+try_again:
ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
if (ret) {
mlog_errno(ret);
@@ -1678,6 +1720,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
mlog_errno(ret);
goto out;
} else if (ret == 1) {
+ clusters_need = wc->w_clen;
ret = ocfs2_refcount_cow(inode, filp, di_bh,
wc->w_cpos, wc->w_clen, UINT_MAX);
if (ret) {
@@ -1692,6 +1735,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
mlog_errno(ret);
goto out;
}
+ clusters_need += clusters_to_alloc;
di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
@@ -1814,6 +1858,22 @@ out:
ocfs2_free_alloc_context(data_ac);
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
+
+ if (ret == -ENOSPC && try_free) {
+ /*
+ * Try to free some truncate log so that we can have enough
+ * clusters to allocate.
+ */
+ try_free = 0;
+
+ ret1 = ocfs2_try_to_free_truncate_log(osb, clusters_need);
+ if (ret1 == 1)
+ goto try_again;
+
+ if (ret1 < 0)
+ mlog_errno(ret1);
+ }
+
return ret;
}
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 76bfdfd..eceb456 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -68,8 +68,27 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
else
clear_bit(1, (unsigned long *)&iocb->private);
}
+
+/*
+ * Using a named enum representing lock types in terms of #N bit stored in
+ * iocb->private, which is going to be used for communication bewteen
+ * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
+ */
+enum ocfs2_iocb_lock_bits {
+ OCFS2_IOCB_RW_LOCK = 0,
+ OCFS2_IOCB_RW_LOCK_LEVEL,
+ OCFS2_IOCB_SEM,
+ OCFS2_IOCB_NUM_LOCKS
+};
+
#define ocfs2_iocb_clear_rw_locked(iocb) \
- clear_bit(0, (unsigned long *)&iocb->private)
+ clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
#define ocfs2_iocb_rw_locked_level(iocb) \
- test_bit(1, (unsigned long *)&iocb->private)
+ test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_set_sem_locked(iocb) \
+ set_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_clear_sem_locked(iocb) \
+ clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_is_sem_locked(iocb) \
+ test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 52c7557..a6cc053 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -82,6 +82,7 @@ static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
#define O2HB_DB_TYPE_REGION_LIVENODES 4
#define O2HB_DB_TYPE_REGION_NUMBER 5
#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6
+#define O2HB_DB_TYPE_REGION_PINNED 7
struct o2hb_debug_buf {
int db_type;
int db_size;
@@ -101,6 +102,7 @@ static struct o2hb_debug_buf *o2hb_db_failedregions;
#define O2HB_DEBUG_FAILEDREGIONS "failed_regions"
#define O2HB_DEBUG_REGION_NUMBER "num"
#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms"
+#define O2HB_DEBUG_REGION_PINNED "pinned"
static struct dentry *o2hb_debug_dir;
static struct dentry *o2hb_debug_livenodes;
@@ -132,6 +134,33 @@ char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = {
unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
+/*
+ * o2hb_dependent_users tracks the number of registered callbacks that depend
+ * on heartbeat. o2net and o2dlm are two entities that register this callback.
+ * However only o2dlm depends on the heartbeat. It does not want the heartbeat
+ * to stop while a dlm domain is still active.
+ */
+unsigned int o2hb_dependent_users;
+
+/*
+ * In global heartbeat mode, all regions are pinned if there are one or more
+ * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All
+ * regions are unpinned if the region count exceeds the cut off or the number
+ * of dependent users falls to zero.
+ */
+#define O2HB_PIN_CUT_OFF 3
+
+/*
+ * In local heartbeat mode, we assume the dlm domain name to be the same as
+ * region uuid. This is true for domains created for the file system but not
+ * necessarily true for userdlm domains. This is a known limitation.
+ *
+ * In global heartbeat mode, we pin/unpin all o2hb regions. This solution
+ * works for both file system and userdlm domains.
+ */
+static int o2hb_region_pin(const char *region_uuid);
+static void o2hb_region_unpin(const char *region_uuid);
+
/* Only sets a new threshold if there are no active regions.
*
* No locking or otherwise interesting code is required for reading
@@ -186,7 +215,9 @@ struct o2hb_region {
struct config_item hr_item;
struct list_head hr_all_item;
- unsigned hr_unclean_stop:1;
+ unsigned hr_unclean_stop:1,
+ hr_item_pinned:1,
+ hr_item_dropped:1;
/* protected by the hr_callback_sem */
struct task_struct *hr_task;
@@ -212,9 +243,11 @@ struct o2hb_region {
struct dentry *hr_debug_livenodes;
struct dentry *hr_debug_regnum;
struct dentry *hr_debug_elapsed_time;
+ struct dentry *hr_debug_pinned;
struct o2hb_debug_buf *hr_db_livenodes;
struct o2hb_debug_buf *hr_db_regnum;
struct o2hb_debug_buf *hr_db_elapsed_time;
+ struct o2hb_debug_buf *hr_db_pinned;
/* let the person setting up hb wait for it to return until it
* has reached a 'steady' state. This will be fixed when we have
@@ -307,8 +340,7 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg)
static void o2hb_disarm_write_timeout(struct o2hb_region *reg)
{
- cancel_delayed_work(&reg->hr_write_timeout_work);
- flush_scheduled_work();
+ cancel_delayed_work_sync(&reg->hr_write_timeout_work);
}
static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
@@ -702,6 +734,14 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg,
config_item_name(&reg->hr_item));
set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
+
+ /*
+ * If global heartbeat active, unpin all regions if the
+ * region count > CUT_OFF
+ */
+ if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
+ o2hb_region_unpin(NULL);
}
static int o2hb_check_slot(struct o2hb_region *reg,
@@ -1042,6 +1082,9 @@ static int o2hb_thread(void *data)
set_user_nice(current, -20);
+ /* Pin node */
+ o2nm_depend_this_node();
+
while (!kthread_should_stop() && !reg->hr_unclean_stop) {
/* We track the time spent inside
* o2hb_do_disk_heartbeat so that we avoid more than
@@ -1091,6 +1134,9 @@ static int o2hb_thread(void *data)
mlog_errno(ret);
}
+ /* Unpin node */
+ o2nm_undepend_this_node();
+
mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n");
return 0;
@@ -1143,6 +1189,12 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
reg->hr_last_timeout_start));
goto done;
+ case O2HB_DB_TYPE_REGION_PINNED:
+ reg = (struct o2hb_region *)db->db_data;
+ out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
+ !!reg->hr_item_pinned);
+ goto done;
+
default:
goto done;
}
@@ -1316,6 +1368,8 @@ int o2hb_init(void)
memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
+ o2hb_dependent_users = 0;
+
return o2hb_debug_init();
}
@@ -1385,6 +1439,7 @@ static void o2hb_region_release(struct config_item *item)
debugfs_remove(reg->hr_debug_livenodes);
debugfs_remove(reg->hr_debug_regnum);
debugfs_remove(reg->hr_debug_elapsed_time);
+ debugfs_remove(reg->hr_debug_pinned);
debugfs_remove(reg->hr_debug_dir);
spin_lock(&o2hb_live_lock);
@@ -1949,6 +2004,18 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
goto bail;
}
+ reg->hr_debug_pinned =
+ o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
+ reg->hr_debug_dir,
+ &(reg->hr_db_pinned),
+ sizeof(*(reg->hr_db_pinned)),
+ O2HB_DB_TYPE_REGION_PINNED,
+ 0, 0, reg);
+ if (!reg->hr_debug_pinned) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
ret = 0;
bail:
return ret;
@@ -1964,8 +2031,10 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
if (reg == NULL)
return ERR_PTR(-ENOMEM);
- if (strlen(name) > O2HB_MAX_REGION_NAME_LEN)
- return ERR_PTR(-ENAMETOOLONG);
+ if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) {
+ ret = -ENAMETOOLONG;
+ goto free;
+ }
spin_lock(&o2hb_live_lock);
reg->hr_region_num = 0;
@@ -1974,7 +2043,8 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
O2NM_MAX_REGIONS);
if (reg->hr_region_num >= O2NM_MAX_REGIONS) {
spin_unlock(&o2hb_live_lock);
- return ERR_PTR(-EFBIG);
+ ret = -EFBIG;
+ goto free;
}
set_bit(reg->hr_region_num, o2hb_region_bitmap);
}
@@ -1986,10 +2056,13 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
if (ret) {
config_item_put(&reg->hr_item);
- return ERR_PTR(ret);
+ goto free;
}
return &reg->hr_item;
+free:
+ kfree(reg);
+ return ERR_PTR(ret);
}
static void o2hb_heartbeat_group_drop_item(struct config_group *group,
@@ -1997,15 +2070,20 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
{
struct task_struct *hb_task;
struct o2hb_region *reg = to_o2hb_region(item);
+ int quorum_region = 0;
/* stop the thread when the user removes the region dir */
spin_lock(&o2hb_live_lock);
if (o2hb_global_heartbeat_active()) {
clear_bit(reg->hr_region_num, o2hb_region_bitmap);
clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
+ if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
+ quorum_region = 1;
+ clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
}
hb_task = reg->hr_task;
reg->hr_task = NULL;
+ reg->hr_item_dropped = 1;
spin_unlock(&o2hb_live_lock);
if (hb_task)
@@ -2023,7 +2101,27 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
if (o2hb_global_heartbeat_active())
printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
config_item_name(&reg->hr_item));
+
config_item_put(item);
+
+ if (!o2hb_global_heartbeat_active() || !quorum_region)
+ return;
+
+ /*
+ * If global heartbeat active and there are dependent users,
+ * pin all regions if quorum region count <= CUT_OFF
+ */
+ spin_lock(&o2hb_live_lock);
+
+ if (!o2hb_dependent_users)
+ goto unlock;
+
+ if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
+ o2hb_region_pin(NULL);
+
+unlock:
+ spin_unlock(&o2hb_live_lock);
}
struct o2hb_heartbeat_group_attribute {
@@ -2209,63 +2307,138 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
}
EXPORT_SYMBOL_GPL(o2hb_setup_callback);
-static struct o2hb_region *o2hb_find_region(const char *region_uuid)
+/*
+ * In local heartbeat mode, region_uuid passed matches the dlm domain name.
+ * In global heartbeat mode, region_uuid passed is NULL.
+ *
+ * In local, we only pin the matching region. In global we pin all the active
+ * regions.
+ */
+static int o2hb_region_pin(const char *region_uuid)
{
- struct o2hb_region *p, *reg = NULL;
+ int ret = 0, found = 0;
+ struct o2hb_region *reg;
+ char *uuid;
assert_spin_locked(&o2hb_live_lock);
- list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
- if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
- reg = p;
- break;
+ list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ uuid = config_item_name(&reg->hr_item);
+
+ /* local heartbeat */
+ if (region_uuid) {
+ if (strcmp(region_uuid, uuid))
+ continue;
+ found = 1;
}
+
+ if (reg->hr_item_pinned || reg->hr_item_dropped)
+ goto skip_pin;
+
+ /* Ignore ENOENT only for local hb (userdlm domain) */
+ ret = o2nm_depend_item(&reg->hr_item);
+ if (!ret) {
+ mlog(ML_CLUSTER, "Pin region %s\n", uuid);
+ reg->hr_item_pinned = 1;
+ } else {
+ if (ret == -ENOENT && found)
+ ret = 0;
+ else {
+ mlog(ML_ERROR, "Pin region %s fails with %d\n",
+ uuid, ret);
+ break;
+ }
+ }
+skip_pin:
+ if (found)
+ break;
}
- return reg;
+ return ret;
}
-static int o2hb_region_get(const char *region_uuid)
+/*
+ * In local heartbeat mode, region_uuid passed matches the dlm domain name.
+ * In global heartbeat mode, region_uuid passed is NULL.
+ *
+ * In local, we only unpin the matching region. In global we unpin all the
+ * active regions.
+ */
+static void o2hb_region_unpin(const char *region_uuid)
{
- int ret = 0;
struct o2hb_region *reg;
+ char *uuid;
+ int found = 0;
- spin_lock(&o2hb_live_lock);
+ assert_spin_locked(&o2hb_live_lock);
- reg = o2hb_find_region(region_uuid);
- if (!reg)
- ret = -ENOENT;
- spin_unlock(&o2hb_live_lock);
+ list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ uuid = config_item_name(&reg->hr_item);
+ if (region_uuid) {
+ if (strcmp(region_uuid, uuid))
+ continue;
+ found = 1;
+ }
- if (ret)
- goto out;
+ if (reg->hr_item_pinned) {
+ mlog(ML_CLUSTER, "Unpin region %s\n", uuid);
+ o2nm_undepend_item(&reg->hr_item);
+ reg->hr_item_pinned = 0;
+ }
+ if (found)
+ break;
+ }
+}
- ret = o2nm_depend_this_node();
- if (ret)
- goto out;
+static int o2hb_region_inc_user(const char *region_uuid)
+{
+ int ret = 0;
- ret = o2nm_depend_item(&reg->hr_item);
- if (ret)
- o2nm_undepend_this_node();
+ spin_lock(&o2hb_live_lock);
-out:
+ /* local heartbeat */
+ if (!o2hb_global_heartbeat_active()) {
+ ret = o2hb_region_pin(region_uuid);
+ goto unlock;
+ }
+
+ /*
+ * if global heartbeat active and this is the first dependent user,
+ * pin all regions if quorum region count <= CUT_OFF
+ */
+ o2hb_dependent_users++;
+ if (o2hb_dependent_users > 1)
+ goto unlock;
+
+ if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
+ ret = o2hb_region_pin(NULL);
+
+unlock:
+ spin_unlock(&o2hb_live_lock);
return ret;
}
-static void o2hb_region_put(const char *region_uuid)
+void o2hb_region_dec_user(const char *region_uuid)
{
- struct o2hb_region *reg;
-
spin_lock(&o2hb_live_lock);
- reg = o2hb_find_region(region_uuid);
+ /* local heartbeat */
+ if (!o2hb_global_heartbeat_active()) {
+ o2hb_region_unpin(region_uuid);
+ goto unlock;
+ }
- spin_unlock(&o2hb_live_lock);
+ /*
+ * if global heartbeat active and there are no dependent users,
+ * unpin all quorum regions
+ */
+ o2hb_dependent_users--;
+ if (!o2hb_dependent_users)
+ o2hb_region_unpin(NULL);
- if (reg) {
- o2nm_undepend_item(&reg->hr_item);
- o2nm_undepend_this_node();
- }
+unlock:
+ spin_unlock(&o2hb_live_lock);
}
int o2hb_register_callback(const char *region_uuid,
@@ -2286,9 +2459,11 @@ int o2hb_register_callback(const char *region_uuid,
}
if (region_uuid) {
- ret = o2hb_region_get(region_uuid);
- if (ret)
+ ret = o2hb_region_inc_user(region_uuid);
+ if (ret) {
+ mlog_errno(ret);
goto out;
+ }
}
down_write(&o2hb_callback_sem);
@@ -2306,7 +2481,7 @@ int o2hb_register_callback(const char *region_uuid,
up_write(&o2hb_callback_sem);
ret = 0;
out:
- mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n",
+ mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
ret, __builtin_return_address(0), hc);
return ret;
}
@@ -2317,7 +2492,7 @@ void o2hb_unregister_callback(const char *region_uuid,
{
BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
- mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
+ mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
__builtin_return_address(0), hc);
/* XXX Can this happen _with_ a region reference? */
@@ -2325,7 +2500,7 @@ void o2hb_unregister_callback(const char *region_uuid,
return;
if (region_uuid)
- o2hb_region_put(region_uuid);
+ o2hb_region_dec_user(region_uuid);
down_write(&o2hb_callback_sem);
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index c7fba39..6c61771 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -113,10 +113,11 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
define_mask(QUOTA),
define_mask(REFCOUNT),
define_mask(BASTS),
+ define_mask(RESERVATIONS),
+ define_mask(CLUSTER),
define_mask(ERROR),
define_mask(NOTICE),
define_mask(KTHREAD),
- define_mask(RESERVATIONS),
};
static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index ea2ed9f..34d6544 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -81,7 +81,7 @@
#include <linux/sched.h>
/* bits that are frequently given and infrequently matched in the low word */
-/* NOTE: If you add a flag, you need to also update mlog.c! */
+/* NOTE: If you add a flag, you need to also update masklog.c! */
#define ML_ENTRY 0x0000000000000001ULL /* func call entry */
#define ML_EXIT 0x0000000000000002ULL /* func call exit */
#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */
@@ -114,13 +114,14 @@
#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */
-#define ML_BASTS 0x0000001000000000ULL /* dlmglue asts and basts */
+#define ML_BASTS 0x0000000100000000ULL /* dlmglue asts and basts */
+#define ML_RESERVATIONS 0x0000000200000000ULL /* ocfs2 alloc reservations */
+#define ML_CLUSTER 0x0000000400000000ULL /* cluster stack */
+
/* bits that are infrequently given and frequently matched in the high word */
-#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
-#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
-#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
-#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
-#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */
+#define ML_ERROR 0x1000000000000000ULL /* sent to KERN_ERR */
+#define ML_NOTICE 0x2000000000000000ULL /* setn to KERN_NOTICE */
+#define ML_KTHREAD 0x4000000000000000ULL /* kernel thread activity */
#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index a3f150e..3a583590 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -46,10 +46,15 @@
#define O2NET_DEBUG_DIR "o2net"
#define SC_DEBUG_NAME "sock_containers"
#define NST_DEBUG_NAME "send_tracking"
+#define STATS_DEBUG_NAME "stats"
+
+#define SHOW_SOCK_CONTAINERS 0
+#define SHOW_SOCK_STATS 1
static struct dentry *o2net_dentry;
static struct dentry *sc_dentry;
static struct dentry *nst_dentry;
+static struct dentry *stats_dentry;
static DEFINE_SPINLOCK(o2net_debug_lock);
@@ -123,37 +128,42 @@ static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static int nst_seq_show(struct seq_file *seq, void *v)
{
struct o2net_send_tracking *nst, *dummy_nst = seq->private;
+ ktime_t now;
+ s64 sock, send, status;
spin_lock(&o2net_debug_lock);
nst = next_nst(dummy_nst);
+ if (!nst)
+ goto out;
- if (nst != NULL) {
- /* get_task_comm isn't exported. oh well. */
- seq_printf(seq, "%p:\n"
- " pid: %lu\n"
- " tgid: %lu\n"
- " process name: %s\n"
- " node: %u\n"
- " sc: %p\n"
- " message id: %d\n"
- " message type: %u\n"
- " message key: 0x%08x\n"
- " sock acquiry: %lu.%ld\n"
- " send start: %lu.%ld\n"
- " wait start: %lu.%ld\n",
- nst, (unsigned long)nst->st_task->pid,
- (unsigned long)nst->st_task->tgid,
- nst->st_task->comm, nst->st_node,
- nst->st_sc, nst->st_id, nst->st_msg_type,
- nst->st_msg_key,
- nst->st_sock_time.tv_sec,
- (long)nst->st_sock_time.tv_usec,
- nst->st_send_time.tv_sec,
- (long)nst->st_send_time.tv_usec,
- nst->st_status_time.tv_sec,
- (long)nst->st_status_time.tv_usec);
- }
+ now = ktime_get();
+ sock = ktime_to_us(ktime_sub(now, nst->st_sock_time));
+ send = ktime_to_us(ktime_sub(now, nst->st_send_time));
+ status = ktime_to_us(ktime_sub(now, nst->st_status_time));
+
+ /* get_task_comm isn't exported. oh well. */
+ seq_printf(seq, "%p:\n"
+ " pid: %lu\n"
+ " tgid: %lu\n"
+ " process name: %s\n"
+ " node: %u\n"
+ " sc: %p\n"
+ " message id: %d\n"
+ " message type: %u\n"
+ " message key: 0x%08x\n"
+ " sock acquiry: %lld usecs ago\n"
+ " send start: %lld usecs ago\n"
+ " wait start: %lld usecs ago\n",
+ nst, (unsigned long)task_pid_nr(nst->st_task),
+ (unsigned long)nst->st_task->tgid,
+ nst->st_task->comm, nst->st_node,
+ nst->st_sc, nst->st_id, nst->st_msg_type,
+ nst->st_msg_key,
+ (long long)sock,
+ (long long)send,
+ (long long)status);
+out:
spin_unlock(&o2net_debug_lock);
return 0;
@@ -228,6 +238,11 @@ void o2net_debug_del_sc(struct o2net_sock_container *sc)
spin_unlock(&o2net_debug_lock);
}
+struct o2net_sock_debug {
+ int dbg_ctxt;
+ struct o2net_sock_container *dbg_sock;
+};
+
static struct o2net_sock_container
*next_sc(struct o2net_sock_container *sc_start)
{
@@ -253,7 +268,8 @@ static struct o2net_sock_container
static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct o2net_sock_container *sc, *dummy_sc = seq->private;
+ struct o2net_sock_debug *sd = seq->private;
+ struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
spin_lock(&o2net_debug_lock);
sc = next_sc(dummy_sc);
@@ -264,7 +280,8 @@ static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct o2net_sock_container *sc, *dummy_sc = seq->private;
+ struct o2net_sock_debug *sd = seq->private;
+ struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
spin_lock(&o2net_debug_lock);
sc = next_sc(dummy_sc);
@@ -276,65 +293,107 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return sc; /* unused, just needs to be null when done */
}
-#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec
+#ifdef CONFIG_OCFS2_FS_STATS
+# define sc_send_count(_s) ((_s)->sc_send_count)
+# define sc_recv_count(_s) ((_s)->sc_recv_count)
+# define sc_tv_acquiry_total_ns(_s) (ktime_to_ns((_s)->sc_tv_acquiry_total))
+# define sc_tv_send_total_ns(_s) (ktime_to_ns((_s)->sc_tv_send_total))
+# define sc_tv_status_total_ns(_s) (ktime_to_ns((_s)->sc_tv_status_total))
+# define sc_tv_process_total_ns(_s) (ktime_to_ns((_s)->sc_tv_process_total))
+#else
+# define sc_send_count(_s) (0U)
+# define sc_recv_count(_s) (0U)
+# define sc_tv_acquiry_total_ns(_s) (0LL)
+# define sc_tv_send_total_ns(_s) (0LL)
+# define sc_tv_status_total_ns(_s) (0LL)
+# define sc_tv_process_total_ns(_s) (0LL)
+#endif
+
+/* So that debugfs.ocfs2 can determine which format is being used */
+#define O2NET_STATS_STR_VERSION 1
+static void sc_show_sock_stats(struct seq_file *seq,
+ struct o2net_sock_container *sc)
+{
+ if (!sc)
+ return;
+
+ seq_printf(seq, "%d,%u,%lu,%lld,%lld,%lld,%lu,%lld\n", O2NET_STATS_STR_VERSION,
+ sc->sc_node->nd_num, (unsigned long)sc_send_count(sc),
+ (long long)sc_tv_acquiry_total_ns(sc),
+ (long long)sc_tv_send_total_ns(sc),
+ (long long)sc_tv_status_total_ns(sc),
+ (unsigned long)sc_recv_count(sc),
+ (long long)sc_tv_process_total_ns(sc));
+}
+
+static void sc_show_sock_container(struct seq_file *seq,
+ struct o2net_sock_container *sc)
+{
+ struct inet_sock *inet = NULL;
+ __be32 saddr = 0, daddr = 0;
+ __be16 sport = 0, dport = 0;
+
+ if (!sc)
+ return;
+
+ if (sc->sc_sock) {
+ inet = inet_sk(sc->sc_sock->sk);
+ /* the stack's structs aren't sparse endian clean */
+ saddr = (__force __be32)inet->inet_saddr;
+ daddr = (__force __be32)inet->inet_daddr;
+ sport = (__force __be16)inet->inet_sport;
+ dport = (__force __be16)inet->inet_dport;
+ }
+
+ /* XXX sigh, inet-> doesn't have sparse annotation so any
+ * use of it here generates a warning with -Wbitwise */
+ seq_printf(seq, "%p:\n"
+ " krefs: %d\n"
+ " sock: %pI4:%u -> "
+ "%pI4:%u\n"
+ " remote node: %s\n"
+ " page off: %zu\n"
+ " handshake ok: %u\n"
+ " timer: %lld usecs\n"
+ " data ready: %lld usecs\n"
+ " advance start: %lld usecs\n"
+ " advance stop: %lld usecs\n"
+ " func start: %lld usecs\n"
+ " func stop: %lld usecs\n"
+ " func key: 0x%08x\n"
+ " func type: %u\n",
+ sc,
+ atomic_read(&sc->sc_kref.refcount),
+ &saddr, inet ? ntohs(sport) : 0,
+ &daddr, inet ? ntohs(dport) : 0,
+ sc->sc_node->nd_name,
+ sc->sc_page_off,
+ sc->sc_handshake_ok,
+ (long long)ktime_to_us(sc->sc_tv_timer),
+ (long long)ktime_to_us(sc->sc_tv_data_ready),
+ (long long)ktime_to_us(sc->sc_tv_advance_start),
+ (long long)ktime_to_us(sc->sc_tv_advance_stop),
+ (long long)ktime_to_us(sc->sc_tv_func_start),
+ (long long)ktime_to_us(sc->sc_tv_func_stop),
+ sc->sc_msg_key,
+ sc->sc_msg_type);
+}
static int sc_seq_show(struct seq_file *seq, void *v)
{
- struct o2net_sock_container *sc, *dummy_sc = seq->private;
+ struct o2net_sock_debug *sd = seq->private;
+ struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
spin_lock(&o2net_debug_lock);
sc = next_sc(dummy_sc);
- if (sc != NULL) {
- struct inet_sock *inet = NULL;
-
- __be32 saddr = 0, daddr = 0;
- __be16 sport = 0, dport = 0;
-
- if (sc->sc_sock) {
- inet = inet_sk(sc->sc_sock->sk);
- /* the stack's structs aren't sparse endian clean */
- saddr = (__force __be32)inet->inet_saddr;
- daddr = (__force __be32)inet->inet_daddr;
- sport = (__force __be16)inet->inet_sport;
- dport = (__force __be16)inet->inet_dport;
- }
-
- /* XXX sigh, inet-> doesn't have sparse annotation so any
- * use of it here generates a warning with -Wbitwise */
- seq_printf(seq, "%p:\n"
- " krefs: %d\n"
- " sock: %pI4:%u -> "
- "%pI4:%u\n"
- " remote node: %s\n"
- " page off: %zu\n"
- " handshake ok: %u\n"
- " timer: %lu.%ld\n"
- " data ready: %lu.%ld\n"
- " advance start: %lu.%ld\n"
- " advance stop: %lu.%ld\n"
- " func start: %lu.%ld\n"
- " func stop: %lu.%ld\n"
- " func key: %u\n"
- " func type: %u\n",
- sc,
- atomic_read(&sc->sc_kref.refcount),
- &saddr, inet ? ntohs(sport) : 0,
- &daddr, inet ? ntohs(dport) : 0,
- sc->sc_node->nd_name,
- sc->sc_page_off,
- sc->sc_handshake_ok,
- TV_SEC_USEC(sc->sc_tv_timer),
- TV_SEC_USEC(sc->sc_tv_data_ready),
- TV_SEC_USEC(sc->sc_tv_advance_start),
- TV_SEC_USEC(sc->sc_tv_advance_stop),
- TV_SEC_USEC(sc->sc_tv_func_start),
- TV_SEC_USEC(sc->sc_tv_func_stop),
- sc->sc_msg_key,
- sc->sc_msg_type);
+ if (sc) {
+ if (sd->dbg_ctxt == SHOW_SOCK_CONTAINERS)
+ sc_show_sock_container(seq, sc);
+ else
+ sc_show_sock_stats(seq, sc);
}
-
spin_unlock(&o2net_debug_lock);
return 0;
@@ -351,7 +410,7 @@ static const struct seq_operations sc_seq_ops = {
.show = sc_seq_show,
};
-static int sc_fop_open(struct inode *inode, struct file *file)
+static int sc_common_open(struct file *file, struct o2net_sock_debug *sd)
{
struct o2net_sock_container *dummy_sc;
struct seq_file *seq;
@@ -369,7 +428,8 @@ static int sc_fop_open(struct inode *inode, struct file *file)
goto out;
seq = file->private_data;
- seq->private = dummy_sc;
+ seq->private = sd;
+ sd->dbg_sock = dummy_sc;
o2net_debug_add_sc(dummy_sc);
dummy_sc = NULL;
@@ -382,12 +442,48 @@ out:
static int sc_fop_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
- struct o2net_sock_container *dummy_sc = seq->private;
+ struct o2net_sock_debug *sd = seq->private;
+ struct o2net_sock_container *dummy_sc = sd->dbg_sock;
o2net_debug_del_sc(dummy_sc);
return seq_release_private(inode, file);
}
+static int stats_fop_open(struct inode *inode, struct file *file)
+{
+ struct o2net_sock_debug *sd;
+
+ sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
+ if (sd == NULL)
+ return -ENOMEM;
+
+ sd->dbg_ctxt = SHOW_SOCK_STATS;
+ sd->dbg_sock = NULL;
+
+ return sc_common_open(file, sd);
+}
+
+static const struct file_operations stats_seq_fops = {
+ .open = stats_fop_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = sc_fop_release,
+};
+
+static int sc_fop_open(struct inode *inode, struct file *file)
+{
+ struct o2net_sock_debug *sd;
+
+ sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
+ if (sd == NULL)
+ return -ENOMEM;
+
+ sd->dbg_ctxt = SHOW_SOCK_CONTAINERS;
+ sd->dbg_sock = NULL;
+
+ return sc_common_open(file, sd);
+}
+
static const struct file_operations sc_seq_fops = {
.open = sc_fop_open,
.read = seq_read,
@@ -419,25 +515,29 @@ int o2net_debugfs_init(void)
goto bail;
}
+ stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR,
+ o2net_dentry, NULL,
+ &stats_seq_fops);
+ if (!stats_dentry) {
+ mlog_errno(-ENOMEM);
+ goto bail;
+ }
+
return 0;
bail:
- if (sc_dentry)
- debugfs_remove(sc_dentry);
- if (nst_dentry)
- debugfs_remove(nst_dentry);
- if (o2net_dentry)
- debugfs_remove(o2net_dentry);
+ debugfs_remove(stats_dentry);
+ debugfs_remove(sc_dentry);
+ debugfs_remove(nst_dentry);
+ debugfs_remove(o2net_dentry);
return -ENOMEM;
}
void o2net_debugfs_exit(void)
{
- if (sc_dentry)
- debugfs_remove(sc_dentry);
- if (nst_dentry)
- debugfs_remove(nst_dentry);
- if (o2net_dentry)
- debugfs_remove(o2net_dentry);
+ debugfs_remove(stats_dentry);
+ debugfs_remove(sc_dentry);
+ debugfs_remove(nst_dentry);
+ debugfs_remove(o2net_dentry);
}
#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index cf3e166..a873667 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -325,5 +325,7 @@ void o2quo_init(void)
void o2quo_exit(void)
{
- flush_scheduled_work();
+ struct o2quo_state *qs = &o2quo_state;
+
+ flush_work_sync(&qs->qs_work);
}
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 9aa426e..3b11cb1 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -153,63 +153,114 @@ static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
nst->st_node = node;
}
-static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
{
- do_gettimeofday(&nst->st_sock_time);
+ nst->st_sock_time = ktime_get();
}
-static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
{
- do_gettimeofday(&nst->st_send_time);
+ nst->st_send_time = ktime_get();
}
-static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
{
- do_gettimeofday(&nst->st_status_time);
+ nst->st_status_time = ktime_get();
}
-static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
- struct o2net_sock_container *sc)
+static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+ struct o2net_sock_container *sc)
{
nst->st_sc = sc;
}
-static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
+static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
+ u32 msg_id)
{
nst->st_id = msg_id;
}
-#else /* CONFIG_DEBUG_FS */
-
-static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
- u32 msgkey, struct task_struct *task, u8 node)
+static inline void o2net_set_sock_timer(struct o2net_sock_container *sc)
{
+ sc->sc_tv_timer = ktime_get();
}
-static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_data_ready_time(struct o2net_sock_container *sc)
{
+ sc->sc_tv_data_ready = ktime_get();
}
-static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_advance_start_time(struct o2net_sock_container *sc)
{
+ sc->sc_tv_advance_start = ktime_get();
}
-static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_advance_stop_time(struct o2net_sock_container *sc)
{
+ sc->sc_tv_advance_stop = ktime_get();
}
-static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
- struct o2net_sock_container *sc)
+static inline void o2net_set_func_start_time(struct o2net_sock_container *sc)
{
+ sc->sc_tv_func_start = ktime_get();
}
-static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
- u32 msg_id)
+static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc)
{
+ sc->sc_tv_func_stop = ktime_get();
}
+static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
+{
+ return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start);
+}
+#else /* CONFIG_DEBUG_FS */
+# define o2net_init_nst(a, b, c, d, e)
+# define o2net_set_nst_sock_time(a)
+# define o2net_set_nst_send_time(a)
+# define o2net_set_nst_status_time(a)
+# define o2net_set_nst_sock_container(a, b)
+# define o2net_set_nst_msg_id(a, b)
+# define o2net_set_sock_timer(a)
+# define o2net_set_data_ready_time(a)
+# define o2net_set_advance_start_time(a)
+# define o2net_set_advance_stop_time(a)
+# define o2net_set_func_start_time(a)
+# define o2net_set_func_stop_time(a)
+# define o2net_get_func_run_time(a) (ktime_t)0
#endif /* CONFIG_DEBUG_FS */
+#ifdef CONFIG_OCFS2_FS_STATS
+static void o2net_update_send_stats(struct o2net_send_tracking *nst,
+ struct o2net_sock_container *sc)
+{
+ sc->sc_tv_status_total = ktime_add(sc->sc_tv_status_total,
+ ktime_sub(ktime_get(),
+ nst->st_status_time));
+ sc->sc_tv_send_total = ktime_add(sc->sc_tv_send_total,
+ ktime_sub(nst->st_status_time,
+ nst->st_send_time));
+ sc->sc_tv_acquiry_total = ktime_add(sc->sc_tv_acquiry_total,
+ ktime_sub(nst->st_send_time,
+ nst->st_sock_time));
+ sc->sc_send_count++;
+}
+
+static void o2net_update_recv_stats(struct o2net_sock_container *sc)
+{
+ sc->sc_tv_process_total = ktime_add(sc->sc_tv_process_total,
+ o2net_get_func_run_time(sc));
+ sc->sc_recv_count++;
+}
+
+#else
+
+# define o2net_update_send_stats(a, b)
+
+# define o2net_update_recv_stats(sc)
+
+#endif /* CONFIG_OCFS2_FS_STATS */
+
static inline int o2net_reconnect_delay(void)
{
return o2nm_single_cluster->cl_reconnect_delay_ms;
@@ -355,6 +406,7 @@ static void sc_kref_release(struct kref *kref)
sc->sc_sock = NULL;
}
+ o2nm_undepend_item(&sc->sc_node->nd_item);
o2nm_node_put(sc->sc_node);
sc->sc_node = NULL;
@@ -376,6 +428,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
{
struct o2net_sock_container *sc, *ret = NULL;
struct page *page = NULL;
+ int status = 0;
page = alloc_page(GFP_NOFS);
sc = kzalloc(sizeof(*sc), GFP_NOFS);
@@ -386,6 +439,13 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
o2nm_node_get(node);
sc->sc_node = node;
+ /* pin the node item of the remote node */
+ status = o2nm_depend_item(&node->nd_item);
+ if (status) {
+ mlog_errno(status);
+ o2nm_node_put(node);
+ goto out;
+ }
INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed);
INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty);
INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc);
@@ -546,7 +606,7 @@ static void o2net_data_ready(struct sock *sk, int bytes)
if (sk->sk_user_data) {
struct o2net_sock_container *sc = sk->sk_user_data;
sclog(sc, "data_ready hit\n");
- do_gettimeofday(&sc->sc_tv_data_ready);
+ o2net_set_data_ready_time(sc);
o2net_sc_queue_work(sc, &sc->sc_rx_work);
ready = sc->sc_data_ready;
} else {
@@ -1070,6 +1130,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
o2net_set_nst_status_time(&nst);
wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw));
+ o2net_update_send_stats(&nst, sc);
+
/* Note that we avoid overwriting the callers status return
* variable if a system error was reported on the other
* side. Callers beware. */
@@ -1183,13 +1245,15 @@ static int o2net_process_message(struct o2net_sock_container *sc,
if (syserr != O2NET_ERR_NONE)
goto out_respond;
- do_gettimeofday(&sc->sc_tv_func_start);
+ o2net_set_func_start_time(sc);
sc->sc_msg_key = be32_to_cpu(hdr->key);
sc->sc_msg_type = be16_to_cpu(hdr->msg_type);
handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) +
be16_to_cpu(hdr->data_len),
nmh->nh_func_data, &ret_data);
- do_gettimeofday(&sc->sc_tv_func_stop);
+ o2net_set_func_stop_time(sc);
+
+ o2net_update_recv_stats(sc);
out_respond:
/* this destroys the hdr, so don't use it after this */
@@ -1300,7 +1364,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
size_t datalen;
sclog(sc, "receiving\n");
- do_gettimeofday(&sc->sc_tv_advance_start);
+ o2net_set_advance_start_time(sc);
if (unlikely(sc->sc_handshake_ok == 0)) {
if(sc->sc_page_off < sizeof(struct o2net_handshake)) {
@@ -1375,7 +1439,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
out:
sclog(sc, "ret = %d\n", ret);
- do_gettimeofday(&sc->sc_tv_advance_stop);
+ o2net_set_advance_stop_time(sc);
return ret;
}
@@ -1475,27 +1539,28 @@ static void o2net_idle_timer(unsigned long data)
{
struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
- struct timeval now;
- do_gettimeofday(&now);
+#ifdef CONFIG_DEBUG_FS
+ ktime_t now = ktime_get();
+#endif
printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
"seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
o2net_idle_timeout() / 1000,
o2net_idle_timeout() % 1000);
- mlog(ML_NOTICE, "here are some times that might help debug the "
- "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
- "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
- sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec,
- now.tv_sec, (long) now.tv_usec,
- sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec,
- sc->sc_tv_advance_start.tv_sec,
- (long) sc->sc_tv_advance_start.tv_usec,
- sc->sc_tv_advance_stop.tv_sec,
- (long) sc->sc_tv_advance_stop.tv_usec,
+
+#ifdef CONFIG_DEBUG_FS
+ mlog(ML_NOTICE, "Here are some times that might help debug the "
+ "situation: (Timer: %lld, Now %lld, DataReady %lld, Advance %lld-%lld, "
+ "Key 0x%08x, Func %u, FuncTime %lld-%lld)\n",
+ (long long)ktime_to_us(sc->sc_tv_timer), (long long)ktime_to_us(now),
+ (long long)ktime_to_us(sc->sc_tv_data_ready),
+ (long long)ktime_to_us(sc->sc_tv_advance_start),
+ (long long)ktime_to_us(sc->sc_tv_advance_stop),
sc->sc_msg_key, sc->sc_msg_type,
- sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec,
- sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec);
+ (long long)ktime_to_us(sc->sc_tv_func_start),
+ (long long)ktime_to_us(sc->sc_tv_func_stop));
+#endif
/*
* Initialize the nn_timeout so that the next connection attempt
@@ -1511,7 +1576,7 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
msecs_to_jiffies(o2net_keepalive_delay()));
- do_gettimeofday(&sc->sc_tv_timer);
+ o2net_set_sock_timer(sc);
mod_timer(&sc->sc_idle_timeout,
jiffies + msecs_to_jiffies(o2net_idle_timeout()));
}
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 15fdbdf..4cbcb65 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -166,18 +166,27 @@ struct o2net_sock_container {
/* original handlers for the sockets */
void (*sc_state_change)(struct sock *sk);
void (*sc_data_ready)(struct sock *sk, int bytes);
-#ifdef CONFIG_DEBUG_FS
- struct list_head sc_net_debug_item;
-#endif
- struct timeval sc_tv_timer;
- struct timeval sc_tv_data_ready;
- struct timeval sc_tv_advance_start;
- struct timeval sc_tv_advance_stop;
- struct timeval sc_tv_func_start;
- struct timeval sc_tv_func_stop;
+
u32 sc_msg_key;
u16 sc_msg_type;
+#ifdef CONFIG_DEBUG_FS
+ struct list_head sc_net_debug_item;
+ ktime_t sc_tv_timer;
+ ktime_t sc_tv_data_ready;
+ ktime_t sc_tv_advance_start;
+ ktime_t sc_tv_advance_stop;
+ ktime_t sc_tv_func_start;
+ ktime_t sc_tv_func_stop;
+#endif
+#ifdef CONFIG_OCFS2_FS_STATS
+ ktime_t sc_tv_acquiry_total;
+ ktime_t sc_tv_send_total;
+ ktime_t sc_tv_status_total;
+ u32 sc_send_count;
+ u32 sc_recv_count;
+ ktime_t sc_tv_process_total;
+#endif
struct mutex sc_send_lock;
};
@@ -220,9 +229,9 @@ struct o2net_send_tracking {
u32 st_msg_type;
u32 st_msg_key;
u8 st_node;
- struct timeval st_sock_time;
- struct timeval st_send_time;
- struct timeval st_status_time;
+ ktime_t st_sock_time;
+ ktime_t st_send_time;
+ ktime_t st_status_time;
};
#else
struct o2net_send_tracking {
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index edaded4..6d80ecc 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -52,9 +52,15 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry)
static int ocfs2_dentry_revalidate(struct dentry *dentry,
struct nameidata *nd)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode;
int ret = 0; /* if all else fails, just return false */
- struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
+ struct ocfs2_super *osb;
+
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = dentry->d_inode;
+ osb = OCFS2_SB(dentry->d_sb);
mlog_entry("(0x%p, '%.*s')\n", dentry,
dentry->d_name.len, dentry->d_name.name);
@@ -169,23 +175,25 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
struct list_head *p;
struct dentry *dentry = NULL;
- spin_lock(&dcache_lock);
-
+ spin_lock(&inode->i_lock);
list_for_each(p, &inode->i_dentry) {
dentry = list_entry(p, struct dentry, d_alias);
+ spin_lock(&dentry->d_lock);
if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
mlog(0, "dentry found: %.*s\n",
dentry->d_name.len, dentry->d_name.name);
- dget_locked(dentry);
+ dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
break;
}
+ spin_unlock(&dentry->d_lock);
dentry = NULL;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
return dentry;
}
@@ -476,7 +484,6 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
out:
iput(inode);
- ocfs2_dentry_attach_gen(dentry);
}
/*
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c49f6de..d417b3f 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2461,8 +2461,10 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
di->i_dx_root = cpu_to_le64(dr_blkno);
+ spin_lock(&OCFS2_I(dir)->ip_lock);
OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
+ spin_unlock(&OCFS2_I(dir)->ip_lock);
ocfs2_journal_dirty(handle, di_bh);
@@ -4466,8 +4468,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
goto out_commit;
}
+ spin_lock(&OCFS2_I(dir)->ip_lock);
OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
+ spin_unlock(&OCFS2_I(dir)->ip_lock);
di->i_dx_root = cpu_to_le64(0ULL);
ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index f449991..3a3ed4b 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -90,19 +90,29 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
- mlog_entry_void();
+ struct dlm_lock_resource *res;
BUG_ON(!dlm);
BUG_ON(!lock);
+ res = lock->lockres;
+
assert_spin_locked(&dlm->ast_lock);
+
if (!list_empty(&lock->ast_list)) {
- mlog(ML_ERROR, "ast list not empty!! pending=%d, newlevel=%d\n",
+ mlog(ML_ERROR, "%s: res %.*s, lock %u:%llu, "
+ "AST list not empty, pending %d, newlevel %d\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
lock->ast_pending, lock->ml.type);
BUG();
}
if (lock->ast_pending)
- mlog(0, "lock has an ast getting flushed right now\n");
+ mlog(0, "%s: res %.*s, lock %u:%llu, AST getting flushed\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
/* putting lock on list, add a ref */
dlm_lock_get(lock);
@@ -110,9 +120,10 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
/* check to see if this ast obsoletes the bast */
if (dlm_should_cancel_bast(dlm, lock)) {
- struct dlm_lock_resource *res = lock->lockres;
- mlog(0, "%s: cancelling bast for %.*s\n",
- dlm->name, res->lockname.len, res->lockname.name);
+ mlog(0, "%s: res %.*s, lock %u:%llu, Cancelling BAST\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
lock->bast_pending = 0;
list_del_init(&lock->bast_list);
lock->ml.highest_blocked = LKM_IVMODE;
@@ -134,8 +145,6 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
- mlog_entry_void();
-
BUG_ON(!dlm);
BUG_ON(!lock);
@@ -147,15 +156,21 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
- mlog_entry_void();
+ struct dlm_lock_resource *res;
BUG_ON(!dlm);
BUG_ON(!lock);
+
assert_spin_locked(&dlm->ast_lock);
+ res = lock->lockres;
+
BUG_ON(!list_empty(&lock->bast_list));
if (lock->bast_pending)
- mlog(0, "lock has a bast getting flushed right now\n");
+ mlog(0, "%s: res %.*s, lock %u:%llu, BAST getting flushed\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
/* putting lock on list, add a ref */
dlm_lock_get(lock);
@@ -167,8 +182,6 @@ void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
- mlog_entry_void();
-
BUG_ON(!dlm);
BUG_ON(!lock);
@@ -213,7 +226,10 @@ void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
dlm_astlockfunc_t *fn;
struct dlm_lockstatus *lksb;
- mlog_entry_void();
+ mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name,
+ res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
lksb = lock->lksb;
fn = lock->ast;
@@ -231,7 +247,10 @@ int dlm_do_remote_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
struct dlm_lockstatus *lksb;
int lksbflags;
- mlog_entry_void();
+ mlog(0, "%s: res %.*s, lock %u:%llu, Remote AST\n", dlm->name,
+ res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
lksb = lock->lksb;
BUG_ON(lock->ml.node == dlm->node_num);
@@ -250,9 +269,14 @@ void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
{
dlm_bastlockfunc_t *fn = lock->bast;
- mlog_entry_void();
BUG_ON(lock->ml.node != dlm->node_num);
+ mlog(0, "%s: res %.*s, lock %u:%llu, Local BAST, blocked %d\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+ blocked_type);
+
(*fn)(lock->astdata, blocked_type);
}
@@ -332,7 +356,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
/* cannot get a proxy ast message if this node owns it */
BUG_ON(res->owner == dlm->node_num);
- mlog(0, "lockres %.*s\n", res->lockname.len, res->lockname.name);
+ mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
+ res->lockname.name);
spin_lock(&res->spinlock);
if (res->state & DLM_LOCK_RES_RECOVERING) {
@@ -382,8 +407,12 @@ do_ast:
if (past->type == DLM_AST) {
/* do not alter lock refcount. switching lists. */
list_move_tail(&lock->list, &res->granted);
- mlog(0, "ast: Adding to granted list... type=%d, "
- "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
+ mlog(0, "%s: res %.*s, lock %u:%llu, Granted type %d => %d\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
+ lock->ml.type, lock->ml.convert_type);
+
if (lock->ml.convert_type != LKM_IVMODE) {
lock->ml.type = lock->ml.convert_type;
lock->ml.convert_type = LKM_IVMODE;
@@ -426,9 +455,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
size_t veclen = 1;
int status;
- mlog_entry("res %.*s, to=%u, type=%d, blocked_type=%d\n",
- res->lockname.len, res->lockname.name, lock->ml.node,
- msg_type, blocked_type);
+ mlog(0, "%s: res %.*s, to %u, type %d, blocked_type %d\n", dlm->name,
+ res->lockname.len, res->lockname.name, lock->ml.node, msg_type,
+ blocked_type);
memset(&past, 0, sizeof(struct dlm_proxy_ast));
past.node_idx = dlm->node_num;
@@ -441,7 +470,6 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
vec[0].iov_len = sizeof(struct dlm_proxy_ast);
vec[0].iov_base = &past;
if (flags & DLM_LKSB_GET_LVB) {
- mlog(0, "returning requested LVB data\n");
be32_add_cpu(&past.flags, LKM_GET_LVB);
vec[1].iov_len = DLM_LVB_LEN;
vec[1].iov_base = lock->lksb->lvb;
@@ -451,8 +479,8 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
lock->ml.node, &status);
if (ret < 0)
- mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
- "node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key,
+ mlog(ML_ERROR, "%s: res %.*s, error %d send AST to node %u\n",
+ dlm->name, res->lockname.len, res->lockname.name, ret,
lock->ml.node);
else {
if (status == DLM_RECOVERING) {
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index b36d0bf..4bdf7ba 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -50,10 +50,10 @@
#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l)
enum dlm_mle_type {
- DLM_MLE_BLOCK,
- DLM_MLE_MASTER,
- DLM_MLE_MIGRATION,
- DLM_MLE_NUM_TYPES
+ DLM_MLE_BLOCK = 0,
+ DLM_MLE_MASTER = 1,
+ DLM_MLE_MIGRATION = 2,
+ DLM_MLE_NUM_TYPES = 3,
};
struct dlm_master_list_entry {
@@ -82,8 +82,8 @@ struct dlm_master_list_entry {
enum dlm_ast_type {
DLM_AST = 0,
- DLM_BAST,
- DLM_ASTUNLOCK
+ DLM_BAST = 1,
+ DLM_ASTUNLOCK = 2,
};
@@ -119,9 +119,9 @@ struct dlm_recovery_ctxt
enum dlm_ctxt_state {
DLM_CTXT_NEW = 0,
- DLM_CTXT_JOINED,
- DLM_CTXT_IN_SHUTDOWN,
- DLM_CTXT_LEAVING,
+ DLM_CTXT_JOINED = 1,
+ DLM_CTXT_IN_SHUTDOWN = 2,
+ DLM_CTXT_LEAVING = 3,
};
struct dlm_ctxt
@@ -388,8 +388,8 @@ struct dlm_lock
enum dlm_lockres_list {
DLM_GRANTED_LIST = 0,
- DLM_CONVERTING_LIST,
- DLM_BLOCKED_LIST
+ DLM_CONVERTING_LIST = 1,
+ DLM_BLOCKED_LIST = 2,
};
static inline int dlm_lvb_is_empty(char *lvb)
@@ -427,27 +427,27 @@ struct dlm_node_iter
enum {
- DLM_MASTER_REQUEST_MSG = 500,
- DLM_UNUSED_MSG1, /* 501 */
- DLM_ASSERT_MASTER_MSG, /* 502 */
- DLM_CREATE_LOCK_MSG, /* 503 */
- DLM_CONVERT_LOCK_MSG, /* 504 */
- DLM_PROXY_AST_MSG, /* 505 */
- DLM_UNLOCK_LOCK_MSG, /* 506 */
- DLM_DEREF_LOCKRES_MSG, /* 507 */
- DLM_MIGRATE_REQUEST_MSG, /* 508 */
- DLM_MIG_LOCKRES_MSG, /* 509 */
- DLM_QUERY_JOIN_MSG, /* 510 */
- DLM_ASSERT_JOINED_MSG, /* 511 */
- DLM_CANCEL_JOIN_MSG, /* 512 */
- DLM_EXIT_DOMAIN_MSG, /* 513 */
- DLM_MASTER_REQUERY_MSG, /* 514 */
- DLM_LOCK_REQUEST_MSG, /* 515 */
- DLM_RECO_DATA_DONE_MSG, /* 516 */
- DLM_BEGIN_RECO_MSG, /* 517 */
- DLM_FINALIZE_RECO_MSG, /* 518 */
- DLM_QUERY_REGION, /* 519 */
- DLM_QUERY_NODEINFO, /* 520 */
+ DLM_MASTER_REQUEST_MSG = 500,
+ DLM_UNUSED_MSG1 = 501,
+ DLM_ASSERT_MASTER_MSG = 502,
+ DLM_CREATE_LOCK_MSG = 503,
+ DLM_CONVERT_LOCK_MSG = 504,
+ DLM_PROXY_AST_MSG = 505,
+ DLM_UNLOCK_LOCK_MSG = 506,
+ DLM_DEREF_LOCKRES_MSG = 507,
+ DLM_MIGRATE_REQUEST_MSG = 508,
+ DLM_MIG_LOCKRES_MSG = 509,
+ DLM_QUERY_JOIN_MSG = 510,
+ DLM_ASSERT_JOINED_MSG = 511,
+ DLM_CANCEL_JOIN_MSG = 512,
+ DLM_EXIT_DOMAIN_MSG = 513,
+ DLM_MASTER_REQUERY_MSG = 514,
+ DLM_LOCK_REQUEST_MSG = 515,
+ DLM_RECO_DATA_DONE_MSG = 516,
+ DLM_BEGIN_RECO_MSG = 517,
+ DLM_FINALIZE_RECO_MSG = 518,
+ DLM_QUERY_REGION = 519,
+ DLM_QUERY_NODEINFO = 520,
};
struct dlm_reco_node_data
@@ -460,19 +460,19 @@ struct dlm_reco_node_data
enum {
DLM_RECO_NODE_DATA_DEAD = -1,
DLM_RECO_NODE_DATA_INIT = 0,
- DLM_RECO_NODE_DATA_REQUESTING,
- DLM_RECO_NODE_DATA_REQUESTED,
- DLM_RECO_NODE_DATA_RECEIVING,
- DLM_RECO_NODE_DATA_DONE,
- DLM_RECO_NODE_DATA_FINALIZE_SENT,
+ DLM_RECO_NODE_DATA_REQUESTING = 1,
+ DLM_RECO_NODE_DATA_REQUESTED = 2,
+ DLM_RECO_NODE_DATA_RECEIVING = 3,
+ DLM_RECO_NODE_DATA_DONE = 4,
+ DLM_RECO_NODE_DATA_FINALIZE_SENT = 5,
};
enum {
DLM_MASTER_RESP_NO = 0,
- DLM_MASTER_RESP_YES,
- DLM_MASTER_RESP_MAYBE,
- DLM_MASTER_RESP_ERROR
+ DLM_MASTER_RESP_YES = 1,
+ DLM_MASTER_RESP_MAYBE = 2,
+ DLM_MASTER_RESP_ERROR = 3,
};
@@ -649,9 +649,9 @@ struct dlm_proxy_ast
#define DLM_MOD_KEY (0x666c6172)
enum dlm_query_join_response_code {
JOIN_DISALLOW = 0,
- JOIN_OK,
- JOIN_OK_NO_MAP,
- JOIN_PROTOCOL_MISMATCH,
+ JOIN_OK = 1,
+ JOIN_OK_NO_MAP = 2,
+ JOIN_PROTOCOL_MISMATCH = 3,
};
struct dlm_query_join_packet {
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 272ec86..04a32be 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -370,92 +370,46 @@ static void dlm_debug_get(struct dlm_debug_ctxt *dc)
kref_get(&dc->debug_refcnt);
}
-static struct debug_buffer *debug_buffer_allocate(void)
+static int debug_release(struct inode *inode, struct file *file)
{
- struct debug_buffer *db = NULL;
-
- db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL);
- if (!db)
- goto bail;
-
- db->len = PAGE_SIZE;
- db->buf = kmalloc(db->len, GFP_KERNEL);
- if (!db->buf)
- goto bail;
-
- return db;
-bail:
- kfree(db);
- return NULL;
-}
-
-static ssize_t debug_buffer_read(struct file *file, char __user *buf,
- size_t nbytes, loff_t *ppos)
-{
- struct debug_buffer *db = file->private_data;
-
- return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len);
-}
-
-static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence)
-{
- struct debug_buffer *db = file->private_data;
- loff_t new = -1;
-
- switch (whence) {
- case 0:
- new = off;
- break;
- case 1:
- new = file->f_pos + off;
- break;
- }
-
- if (new < 0 || new > db->len)
- return -EINVAL;
-
- return (file->f_pos = new);
+ free_page((unsigned long)file->private_data);
+ return 0;
}
-static int debug_buffer_release(struct inode *inode, struct file *file)
+static ssize_t debug_read(struct file *file, char __user *buf,
+ size_t nbytes, loff_t *ppos)
{
- struct debug_buffer *db = file->private_data;
-
- if (db)
- kfree(db->buf);
- kfree(db);
-
- return 0;
+ return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
+ i_size_read(file->f_mapping->host));
}
/* end - util funcs */
/* begin - purge list funcs */
-static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
+static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len)
{
struct dlm_lock_resource *res;
int out = 0;
unsigned long total = 0;
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Dumping Purgelist for Domain: %s\n", dlm->name);
spin_lock(&dlm->spinlock);
list_for_each_entry(res, &dlm->purge_list, purge) {
++total;
- if (db->len - out < 100)
+ if (len - out < 100)
continue;
spin_lock(&res->spinlock);
out += stringify_lockname(res->lockname.name,
res->lockname.len,
- db->buf + out, db->len - out);
- out += snprintf(db->buf + out, db->len - out, "\t%ld\n",
+ buf + out, len - out);
+ out += snprintf(buf + out, len - out, "\t%ld\n",
(jiffies - res->last_used)/HZ);
spin_unlock(&res->spinlock);
}
spin_unlock(&dlm->spinlock);
- out += snprintf(db->buf + out, db->len - out,
- "Total on list: %ld\n", total);
+ out += snprintf(buf + out, len - out, "Total on list: %ld\n", total);
return out;
}
@@ -463,15 +417,15 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
static int debug_purgelist_open(struct inode *inode, struct file *file)
{
struct dlm_ctxt *dlm = inode->i_private;
- struct debug_buffer *db;
+ char *buf = NULL;
- db = debug_buffer_allocate();
- if (!db)
+ buf = (char *) get_zeroed_page(GFP_NOFS);
+ if (!buf)
goto bail;
- db->len = debug_purgelist_print(dlm, db);
+ i_size_write(inode, debug_purgelist_print(dlm, buf, PAGE_SIZE - 1));
- file->private_data = db;
+ file->private_data = buf;
return 0;
bail:
@@ -480,14 +434,14 @@ bail:
static const struct file_operations debug_purgelist_fops = {
.open = debug_purgelist_open,
- .release = debug_buffer_release,
- .read = debug_buffer_read,
- .llseek = debug_buffer_llseek,
+ .release = debug_release,
+ .read = debug_read,
+ .llseek = generic_file_llseek,
};
/* end - purge list funcs */
/* begin - debug mle funcs */
-static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
+static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
{
struct dlm_master_list_entry *mle;
struct hlist_head *bucket;
@@ -495,7 +449,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
int i, out = 0;
unsigned long total = 0, longest = 0, bucket_count = 0;
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Dumping MLEs for Domain: %s\n", dlm->name);
spin_lock(&dlm->master_lock);
@@ -506,16 +460,16 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
master_hash_node);
++total;
++bucket_count;
- if (db->len - out < 200)
+ if (len - out < 200)
continue;
- out += dump_mle(mle, db->buf + out, db->len - out);
+ out += dump_mle(mle, buf + out, len - out);
}
longest = max(longest, bucket_count);
bucket_count = 0;
}
spin_unlock(&dlm->master_lock);
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Total: %ld, Longest: %ld\n", total, longest);
return out;
}
@@ -523,15 +477,15 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
static int debug_mle_open(struct inode *inode, struct file *file)
{
struct dlm_ctxt *dlm = inode->i_private;
- struct debug_buffer *db;
+ char *buf = NULL;
- db = debug_buffer_allocate();
- if (!db)
+ buf = (char *) get_zeroed_page(GFP_NOFS);
+ if (!buf)
goto bail;
- db->len = debug_mle_print(dlm, db);
+ i_size_write(inode, debug_mle_print(dlm, buf, PAGE_SIZE - 1));
- file->private_data = db;
+ file->private_data = buf;
return 0;
bail:
@@ -540,9 +494,9 @@ bail:
static const struct file_operations debug_mle_fops = {
.open = debug_mle_open,
- .release = debug_buffer_release,
- .read = debug_buffer_read,
- .llseek = debug_buffer_llseek,
+ .release = debug_release,
+ .read = debug_read,
+ .llseek = generic_file_llseek,
};
/* end - debug mle funcs */
@@ -757,7 +711,7 @@ static const struct file_operations debug_lockres_fops = {
/* end - debug lockres funcs */
/* begin - debug state funcs */
-static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
+static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
{
int out = 0;
struct dlm_reco_node_data *node;
@@ -781,35 +735,35 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
}
/* Domain: xxxxxxxxxx Key: 0xdfbac769 */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Domain: %s Key: 0x%08x Protocol: %d.%d\n",
dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
/* Thread Pid: xxx Node: xxx State: xxxxx */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Thread Pid: %d Node: %d State: %s\n",
- dlm->dlm_thread_task->pid, dlm->node_num, state);
+ task_pid_nr(dlm->dlm_thread_task), dlm->node_num, state);
/* Number of Joins: xxx Joining Node: xxx */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Number of Joins: %d Joining Node: %d\n",
dlm->num_joins, dlm->joining_node);
/* Domain Map: xx xx xx */
- out += snprintf(db->buf + out, db->len - out, "Domain Map: ");
+ out += snprintf(buf + out, len - out, "Domain Map: ");
out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES,
- db->buf + out, db->len - out);
- out += snprintf(db->buf + out, db->len - out, "\n");
+ buf + out, len - out);
+ out += snprintf(buf + out, len - out, "\n");
/* Live Map: xx xx xx */
- out += snprintf(db->buf + out, db->len - out, "Live Map: ");
+ out += snprintf(buf + out, len - out, "Live Map: ");
out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
- db->buf + out, db->len - out);
- out += snprintf(db->buf + out, db->len - out, "\n");
+ buf + out, len - out);
+ out += snprintf(buf + out, len - out, "\n");
/* Lock Resources: xxx (xxx) */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Lock Resources: %d (%d)\n",
atomic_read(&dlm->res_cur_count),
atomic_read(&dlm->res_tot_count));
@@ -821,29 +775,29 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
cur_mles += atomic_read(&dlm->mle_cur_count[i]);
/* MLEs: xxx (xxx) */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"MLEs: %d (%d)\n", cur_mles, tot_mles);
/* Blocking: xxx (xxx) */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
" Blocking: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK]));
/* Mastery: xxx (xxx) */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
" Mastery: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER]));
/* Migration: xxx (xxx) */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
" Migration: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION]));
/* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Lists: Dirty=%s Purge=%s PendingASTs=%s "
"PendingBASTs=%s\n",
(list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
@@ -852,12 +806,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
(list_empty(&dlm->pending_basts) ? "Empty" : "InUse"));
/* Purge Count: xxx Refs: xxx */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Purge Count: %d Refs: %d\n", dlm->purge_count,
atomic_read(&dlm->dlm_refs.refcount));
/* Dead Node: xxx */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Dead Node: %d\n", dlm->reco.dead_node);
/* What about DLM_RECO_STATE_FINALIZE? */
@@ -867,19 +821,19 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
state = "INACTIVE";
/* Recovery Pid: xxxx Master: xxx State: xxxx */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Recovery Pid: %d Master: %d State: %s\n",
- dlm->dlm_reco_thread_task->pid,
+ task_pid_nr(dlm->dlm_reco_thread_task),
dlm->reco.new_master, state);
/* Recovery Map: xx xx */
- out += snprintf(db->buf + out, db->len - out, "Recovery Map: ");
+ out += snprintf(buf + out, len - out, "Recovery Map: ");
out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES,
- db->buf + out, db->len - out);
- out += snprintf(db->buf + out, db->len - out, "\n");
+ buf + out, len - out);
+ out += snprintf(buf + out, len - out, "\n");
/* Recovery Node State: */
- out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n");
+ out += snprintf(buf + out, len - out, "Recovery Node State:\n");
list_for_each_entry(node, &dlm->reco.node_data, list) {
switch (node->state) {
case DLM_RECO_NODE_DATA_INIT:
@@ -907,7 +861,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
state = "BAD";
break;
}
- out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n",
+ out += snprintf(buf + out, len - out, "\t%u - %s\n",
node->node_num, state);
}
@@ -919,15 +873,15 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
static int debug_state_open(struct inode *inode, struct file *file)
{
struct dlm_ctxt *dlm = inode->i_private;
- struct debug_buffer *db = NULL;
+ char *buf = NULL;
- db = debug_buffer_allocate();
- if (!db)
+ buf = (char *) get_zeroed_page(GFP_NOFS);
+ if (!buf)
goto bail;
- db->len = debug_state_print(dlm, db);
+ i_size_write(inode, debug_state_print(dlm, buf, PAGE_SIZE - 1));
- file->private_data = db;
+ file->private_data = buf;
return 0;
bail:
@@ -936,9 +890,9 @@ bail:
static const struct file_operations debug_state_fops = {
.open = debug_state_open,
- .release = debug_buffer_release,
- .read = debug_buffer_read,
- .llseek = debug_buffer_llseek,
+ .release = debug_release,
+ .read = debug_read,
+ .llseek = generic_file_llseek,
};
/* end - debug state funcs */
@@ -1002,14 +956,10 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm)
struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
if (dc) {
- if (dc->debug_purgelist_dentry)
- debugfs_remove(dc->debug_purgelist_dentry);
- if (dc->debug_mle_dentry)
- debugfs_remove(dc->debug_mle_dentry);
- if (dc->debug_lockres_dentry)
- debugfs_remove(dc->debug_lockres_dentry);
- if (dc->debug_state_dentry)
- debugfs_remove(dc->debug_state_dentry);
+ debugfs_remove(dc->debug_purgelist_dentry);
+ debugfs_remove(dc->debug_mle_dentry);
+ debugfs_remove(dc->debug_lockres_dentry);
+ debugfs_remove(dc->debug_state_dentry);
dlm_debug_put(dc);
}
}
@@ -1040,8 +990,7 @@ bail:
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
{
- if (dlm->dlm_debugfs_subroot)
- debugfs_remove(dlm->dlm_debugfs_subroot);
+ debugfs_remove(dlm->dlm_debugfs_subroot);
}
/* debugfs root */
@@ -1057,7 +1006,6 @@ int dlm_create_debugfs_root(void)
void dlm_destroy_debugfs_root(void)
{
- if (dlm_debugfs_root)
- debugfs_remove(dlm_debugfs_root);
+ debugfs_remove(dlm_debugfs_root);
}
#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
index 8c686d2..1f27c48 100644
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ b/fs/ocfs2/dlm/dlmdebug.h
@@ -37,11 +37,6 @@ struct dlm_debug_ctxt {
struct dentry *debug_purgelist_dentry;
};
-struct debug_buffer {
- int len;
- char *buf;
-};
-
struct debug_lockres {
int dl_len;
char *dl_buf;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 58a93b9..7e38a07 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -460,8 +460,6 @@ redo_bucket:
}
cond_resched_lock(&dlm->spinlock);
num += n;
- mlog(0, "%s: touched %d lockreses in bucket %d "
- "(tot=%d)\n", dlm->name, n, i, num);
}
spin_unlock(&dlm->spinlock);
wake_up(&dlm->dlm_thread_wq);
@@ -959,7 +957,7 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
r += O2HB_MAX_REGION_NAME_LEN;
}
- local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
+ local = kmalloc(sizeof(qr->qr_regions), GFP_ATOMIC);
if (!local) {
status = -ENOMEM;
goto bail;
@@ -1661,8 +1659,8 @@ bail:
static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
{
- o2hb_unregister_callback(NULL, &dlm->dlm_hb_up);
- o2hb_unregister_callback(NULL, &dlm->dlm_hb_down);
+ o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_up);
+ o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_down);
o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
}
@@ -1674,13 +1672,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
- status = o2hb_register_callback(NULL, &dlm->dlm_hb_down);
+ status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_down);
if (status)
goto bail;
o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
- status = o2hb_register_callback(NULL, &dlm->dlm_hb_up);
+ status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_up);
if (status)
goto bail;
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 69cf369..7009292 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -106,6 +106,9 @@ static int dlm_can_grant_new_lock(struct dlm_lock_resource *res,
if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
return 0;
+ if (!dlm_lock_compatible(tmplock->ml.convert_type,
+ lock->ml.type))
+ return 0;
}
return 1;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f564b0e..59f0f6b 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2346,7 +2346,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
*/
static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
- int *numlocks)
+ int *numlocks,
+ int *hasrefs)
{
int ret;
int i;
@@ -2356,6 +2357,9 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
assert_spin_locked(&res->spinlock);
+ *numlocks = 0;
+ *hasrefs = 0;
+
ret = -EINVAL;
if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
mlog(0, "cannot migrate lockres with unknown owner!\n");
@@ -2386,7 +2390,13 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
}
*numlocks = count;
- mlog(0, "migrateable lockres having %d locks\n", *numlocks);
+
+ count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+ if (count < O2NM_MAX_NODES)
+ *hasrefs = 1;
+
+ mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name,
+ res->lockname.len, res->lockname.name, *numlocks, *hasrefs);
leave:
return ret;
@@ -2408,7 +2418,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
const char *name;
unsigned int namelen;
int mle_added = 0;
- int numlocks;
+ int numlocks, hasrefs;
int wake = 0;
if (!dlm_grab(dlm))
@@ -2417,13 +2427,13 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
name = res->lockname.name;
namelen = res->lockname.len;
- mlog(0, "migrating %.*s to %u\n", namelen, name, target);
+ mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target);
/*
* ensure this lockres is a proper candidate for migration
*/
spin_lock(&res->spinlock);
- ret = dlm_is_lockres_migrateable(dlm, res, &numlocks);
+ ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
if (ret < 0) {
spin_unlock(&res->spinlock);
goto leave;
@@ -2431,10 +2441,8 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
spin_unlock(&res->spinlock);
/* no work to do */
- if (numlocks == 0) {
- mlog(0, "no locks were found on this lockres! done!\n");
+ if (numlocks == 0 && !hasrefs)
goto leave;
- }
/*
* preallocate up front
@@ -2459,14 +2467,14 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
* find a node to migrate the lockres to
*/
- mlog(0, "picking a migration node\n");
spin_lock(&dlm->spinlock);
/* pick a new node */
if (!test_bit(target, dlm->domain_map) ||
target >= O2NM_MAX_NODES) {
target = dlm_pick_migration_target(dlm, res);
}
- mlog(0, "node %u chosen for migration\n", target);
+ mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
+ namelen, name, target);
if (target >= O2NM_MAX_NODES ||
!test_bit(target, dlm->domain_map)) {
@@ -2667,7 +2675,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
{
int ret;
int lock_dropped = 0;
- int numlocks;
+ int numlocks, hasrefs;
spin_lock(&res->spinlock);
if (res->owner != dlm->node_num) {
@@ -2681,8 +2689,8 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
}
/* No need to migrate a lockres having no locks */
- ret = dlm_is_lockres_migrateable(dlm, res, &numlocks);
- if (ret >= 0 && numlocks == 0) {
+ ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
+ if (ret >= 0 && numlocks == 0 && !hasrefs) {
spin_unlock(&res->spinlock);
goto leave;
}
@@ -2915,6 +2923,12 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
}
queue++;
}
+
+ nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+ if (nodenum < O2NM_MAX_NODES) {
+ spin_unlock(&res->spinlock);
+ return nodenum;
+ }
spin_unlock(&res->spinlock);
mlog(0, "have not found a suitable target yet! checking domain map\n");
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 2211acf..1d6d1d2 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -122,15 +122,13 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res)
void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
- mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
-
assert_spin_locked(&dlm->spinlock);
assert_spin_locked(&res->spinlock);
if (__dlm_lockres_unused(res)){
if (list_empty(&res->purge)) {
- mlog(0, "putting lockres %.*s:%p onto purge list\n",
- res->lockname.len, res->lockname.name, res);
+ mlog(0, "%s: Adding res %.*s to purge list\n",
+ dlm->name, res->lockname.len, res->lockname.name);
res->last_used = jiffies;
dlm_lockres_get(res);
@@ -138,8 +136,8 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
dlm->purge_count++;
}
} else if (!list_empty(&res->purge)) {
- mlog(0, "removing lockres %.*s:%p from purge list, owner=%u\n",
- res->lockname.len, res->lockname.name, res, res->owner);
+ mlog(0, "%s: Removing res %.*s from purge list\n",
+ dlm->name, res->lockname.len, res->lockname.name);
list_del_init(&res->purge);
dlm_lockres_put(res);
@@ -150,7 +148,6 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
- mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
spin_lock(&dlm->spinlock);
spin_lock(&res->spinlock);
@@ -171,9 +168,8 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
master = (res->owner == dlm->node_num);
-
- mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
- res->lockname.name, master);
+ mlog(0, "%s: Purging res %.*s, master %d\n", dlm->name,
+ res->lockname.len, res->lockname.name, master);
if (!master) {
res->state |= DLM_LOCK_RES_DROPPING_REF;
@@ -189,27 +185,25 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
/* clear our bit from the master's refmap, ignore errors */
ret = dlm_drop_lockres_ref(dlm, res);
if (ret < 0) {
- mlog_errno(ret);
+ mlog(ML_ERROR, "%s: deref %.*s failed %d\n", dlm->name,
+ res->lockname.len, res->lockname.name, ret);
if (!dlm_is_host_down(ret))
BUG();
}
- mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
- dlm->name, res->lockname.len, res->lockname.name, ret);
spin_lock(&dlm->spinlock);
spin_lock(&res->spinlock);
}
if (!list_empty(&res->purge)) {
- mlog(0, "removing lockres %.*s:%p from purgelist, "
- "master = %d\n", res->lockname.len, res->lockname.name,
- res, master);
+ mlog(0, "%s: Removing res %.*s from purgelist, master %d\n",
+ dlm->name, res->lockname.len, res->lockname.name, master);
list_del_init(&res->purge);
dlm_lockres_put(res);
dlm->purge_count--;
}
if (!__dlm_lockres_unused(res)) {
- mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
+ mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
dlm->name, res->lockname.len, res->lockname.name);
__dlm_print_one_lock_resource(res);
BUG();
@@ -266,10 +260,10 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
unused = __dlm_lockres_unused(lockres);
if (!unused ||
(lockres->state & DLM_LOCK_RES_MIGRATING)) {
- mlog(0, "lockres %s:%.*s: is in use or "
- "being remastered, used %d, state %d\n",
- dlm->name, lockres->lockname.len,
- lockres->lockname.name, !unused, lockres->state);
+ mlog(0, "%s: res %.*s is in use or being remastered, "
+ "used %d, state %d\n", dlm->name,
+ lockres->lockname.len, lockres->lockname.name,
+ !unused, lockres->state);
list_move_tail(&dlm->purge_list, &lockres->purge);
spin_unlock(&lockres->spinlock);
continue;
@@ -296,15 +290,12 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
struct list_head *head;
int can_grant = 1;
- //mlog(0, "res->lockname.len=%d\n", res->lockname.len);
- //mlog(0, "res->lockname.name=%p\n", res->lockname.name);
- //mlog(0, "shuffle res %.*s\n", res->lockname.len,
- // res->lockname.name);
-
- /* because this function is called with the lockres
+ /*
+ * Because this function is called with the lockres
* spinlock, and because we know that it is not migrating/
* recovering/in-progress, it is fine to reserve asts and
- * basts right before queueing them all throughout */
+ * basts right before queueing them all throughout
+ */
assert_spin_locked(&dlm->ast_lock);
assert_spin_locked(&res->spinlock);
BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
@@ -314,13 +305,13 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
converting:
if (list_empty(&res->converting))
goto blocked;
- mlog(0, "res %.*s has locks on a convert queue\n", res->lockname.len,
- res->lockname.name);
+ mlog(0, "%s: res %.*s has locks on the convert queue\n", dlm->name,
+ res->lockname.len, res->lockname.name);
target = list_entry(res->converting.next, struct dlm_lock, list);
if (target->ml.convert_type == LKM_IVMODE) {
- mlog(ML_ERROR, "%.*s: converting a lock with no "
- "convert_type!\n", res->lockname.len, res->lockname.name);
+ mlog(ML_ERROR, "%s: res %.*s converting lock to invalid mode\n",
+ dlm->name, res->lockname.len, res->lockname.name);
BUG();
}
head = &res->granted;
@@ -365,9 +356,12 @@ converting:
spin_lock(&target->spinlock);
BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
- mlog(0, "calling ast for converting lock: %.*s, have: %d, "
- "granting: %d, node: %u\n", res->lockname.len,
- res->lockname.name, target->ml.type,
+ mlog(0, "%s: res %.*s, AST for Converting lock %u:%llu, type "
+ "%d => %d, node %u\n", dlm->name, res->lockname.len,
+ res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
+ target->ml.type,
target->ml.convert_type, target->ml.node);
target->ml.type = target->ml.convert_type;
@@ -428,11 +422,14 @@ blocked:
spin_lock(&target->spinlock);
BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
- mlog(0, "calling ast for blocked lock: %.*s, granting: %d, "
- "node: %u\n", res->lockname.len, res->lockname.name,
+ mlog(0, "%s: res %.*s, AST for Blocked lock %u:%llu, type %d, "
+ "node %u\n", dlm->name, res->lockname.len,
+ res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
target->ml.type, target->ml.node);
- // target->ml.type is already correct
+ /* target->ml.type is already correct */
list_move_tail(&target->list, &res->granted);
BUG_ON(!target->lksb);
@@ -453,7 +450,6 @@ leave:
/* must have NO locks when calling this with res !=NULL * */
void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
{
- mlog_entry("dlm=%p, res=%p\n", dlm, res);
if (res) {
spin_lock(&dlm->spinlock);
spin_lock(&res->spinlock);
@@ -466,8 +462,6 @@ void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
{
- mlog_entry("dlm=%p, res=%p\n", dlm, res);
-
assert_spin_locked(&dlm->spinlock);
assert_spin_locked(&res->spinlock);
@@ -484,13 +478,16 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
res->state |= DLM_LOCK_RES_DIRTY;
}
}
+
+ mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
+ res->lockname.name);
}
/* Launch the NM thread for the mounted volume */
int dlm_launch_thread(struct dlm_ctxt *dlm)
{
- mlog(0, "starting dlm thread...\n");
+ mlog(0, "Starting dlm_thread...\n");
dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread");
if (IS_ERR(dlm->dlm_thread_task)) {
@@ -505,7 +502,7 @@ int dlm_launch_thread(struct dlm_ctxt *dlm)
void dlm_complete_thread(struct dlm_ctxt *dlm)
{
if (dlm->dlm_thread_task) {
- mlog(ML_KTHREAD, "waiting for dlm thread to exit\n");
+ mlog(ML_KTHREAD, "Waiting for dlm thread to exit\n");
kthread_stop(dlm->dlm_thread_task);
dlm->dlm_thread_task = NULL;
}
@@ -536,7 +533,12 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
/* get an extra ref on lock */
dlm_lock_get(lock);
res = lock->lockres;
- mlog(0, "delivering an ast for this lockres\n");
+ mlog(0, "%s: res %.*s, Flush AST for lock %u:%llu, type %d, "
+ "node %u\n", dlm->name, res->lockname.len,
+ res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+ lock->ml.type, lock->ml.node);
BUG_ON(!lock->ast_pending);
@@ -557,9 +559,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
/* possible that another ast was queued while
* we were delivering the last one */
if (!list_empty(&lock->ast_list)) {
- mlog(0, "aha another ast got queued while "
- "we were finishing the last one. will "
- "keep the ast_pending flag set.\n");
+ mlog(0, "%s: res %.*s, AST queued while flushing last "
+ "one\n", dlm->name, res->lockname.len,
+ res->lockname.name);
} else
lock->ast_pending = 0;
@@ -590,8 +592,12 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
dlm_lock_put(lock);
spin_unlock(&dlm->ast_lock);
- mlog(0, "delivering a bast for this lockres "
- "(blocked = %d\n", hi);
+ mlog(0, "%s: res %.*s, Flush BAST for lock %u:%llu, "
+ "blocked %d, node %u\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+ hi, lock->ml.node);
if (lock->ml.node != dlm->node_num) {
ret = dlm_send_proxy_bast(dlm, res, lock, hi);
@@ -605,9 +611,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
/* possible that another bast was queued while
* we were delivering the last one */
if (!list_empty(&lock->bast_list)) {
- mlog(0, "aha another bast got queued while "
- "we were finishing the last one. will "
- "keep the bast_pending flag set.\n");
+ mlog(0, "%s: res %.*s, BAST queued while flushing last "
+ "one\n", dlm->name, res->lockname.len,
+ res->lockname.name);
} else
lock->bast_pending = 0;
@@ -675,11 +681,12 @@ static int dlm_thread(void *data)
spin_lock(&res->spinlock);
if (res->owner != dlm->node_num) {
__dlm_print_one_lock_resource(res);
- mlog(ML_ERROR, "inprog:%s, mig:%s, reco:%s, dirty:%s\n",
- res->state & DLM_LOCK_RES_IN_PROGRESS ? "yes" : "no",
- res->state & DLM_LOCK_RES_MIGRATING ? "yes" : "no",
- res->state & DLM_LOCK_RES_RECOVERING ? "yes" : "no",
- res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no");
+ mlog(ML_ERROR, "%s: inprog %d, mig %d, reco %d,"
+ " dirty %d\n", dlm->name,
+ !!(res->state & DLM_LOCK_RES_IN_PROGRESS),
+ !!(res->state & DLM_LOCK_RES_MIGRATING),
+ !!(res->state & DLM_LOCK_RES_RECOVERING),
+ !!(res->state & DLM_LOCK_RES_DIRTY));
}
BUG_ON(res->owner != dlm->node_num);
@@ -693,8 +700,8 @@ static int dlm_thread(void *data)
res->state &= ~DLM_LOCK_RES_DIRTY;
spin_unlock(&res->spinlock);
spin_unlock(&dlm->ast_lock);
- mlog(0, "delaying list shuffling for in-"
- "progress lockres %.*s, state=%d\n",
+ mlog(0, "%s: res %.*s, inprogress, delay list "
+ "shuffle, state %d\n", dlm->name,
res->lockname.len, res->lockname.name,
res->state);
delay = 1;
@@ -706,10 +713,6 @@ static int dlm_thread(void *data)
* spinlock and do NOT have the dlm lock.
* safe to reserve/queue asts and run the lists. */
- mlog(0, "calling dlm_shuffle_lists with dlm=%s, "
- "res=%.*s\n", dlm->name,
- res->lockname.len, res->lockname.name);
-
/* called while holding lockres lock */
dlm_shuffle_lists(dlm, res);
res->state &= ~DLM_LOCK_RES_DIRTY;
@@ -733,7 +736,8 @@ in_progress:
/* unlikely, but we may need to give time to
* other tasks */
if (!--n) {
- mlog(0, "throttling dlm_thread\n");
+ mlog(0, "%s: Throttling dlm thread\n",
+ dlm->name);
break;
}
}
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 75e115f..8c5c0ed 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -351,11 +351,18 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
return &ip->ip_vfs_inode;
}
-static void dlmfs_destroy_inode(struct inode *inode)
+static void dlmfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
}
+static void dlmfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, dlmfs_i_callback);
+}
+
static void dlmfs_evict_inode(struct inode *inode)
{
int status;
@@ -643,16 +650,16 @@ static const struct inode_operations dlmfs_file_inode_operations = {
.setattr = dlmfs_file_setattr,
};
-static int dlmfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *dlmfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt);
+ return mount_nodev(fs_type, flags, data, dlmfs_fill_super);
}
static struct file_system_type dlmfs_fs_type = {
.owner = THIS_MODULE,
.name = "ocfs2_dlmfs",
- .get_sb = dlmfs_get_sb,
+ .mount = dlmfs_mount,
.kill_sb = kill_litter_super,
};
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 19ad145..6adafa5 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -138,7 +138,7 @@ check_gen:
result = d_obtain_alias(inode);
if (!IS_ERR(result))
- result->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(result, &ocfs2_dentry_ops);
else
mlog_errno(PTR_ERR(result));
@@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
if (!IS_ERR(parent))
- parent->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(parent, &ocfs2_dentry_ops);
bail_unlock:
ocfs2_inode_unlock(dir, 0);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 77b4c04..bdadbae0 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1307,10 +1307,13 @@ bail:
return err;
}
-int ocfs2_permission(struct inode *inode, int mask)
+int ocfs2_permission(struct inode *inode, int mask, unsigned int flags)
{
int ret;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
mlog_entry_void();
ret = ocfs2_inode_lock(inode, NULL, 0);
@@ -1320,7 +1323,7 @@ int ocfs2_permission(struct inode *inode, int mask)
goto out;
}
- ret = generic_permission(inode, mask, ocfs2_check_acl);
+ ret = generic_permission(inode, mask, flags, ocfs2_check_acl);
ocfs2_inode_unlock(inode, 0);
out:
@@ -2241,11 +2244,15 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
mutex_lock(&inode->i_mutex);
+ ocfs2_iocb_clear_sem_locked(iocb);
+
relock:
/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
if (direct_io) {
down_read(&inode->i_alloc_sem);
have_alloc_sem = 1;
+ /* communicate with ocfs2_dio_end_io */
+ ocfs2_iocb_set_sem_locked(iocb);
}
/*
@@ -2382,8 +2389,10 @@ out:
ocfs2_rw_unlock(inode, rw_level);
out_sems:
- if (have_alloc_sem)
+ if (have_alloc_sem) {
up_read(&inode->i_alloc_sem);
+ ocfs2_iocb_clear_sem_locked(iocb);
+ }
mutex_unlock(&inode->i_mutex);
@@ -2527,6 +2536,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
goto bail;
}
+ ocfs2_iocb_clear_sem_locked(iocb);
+
/*
* buffered reads protect themselves in ->readpage(). O_DIRECT reads
* need locks to protect pending reads from racing with truncate.
@@ -2534,6 +2545,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
if (filp->f_flags & O_DIRECT) {
down_read(&inode->i_alloc_sem);
have_alloc_sem = 1;
+ ocfs2_iocb_set_sem_locked(iocb);
ret = ocfs2_rw_lock(inode, 0);
if (ret < 0) {
@@ -2575,8 +2587,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
}
bail:
- if (have_alloc_sem)
+ if (have_alloc_sem) {
up_read(&inode->i_alloc_sem);
+ ocfs2_iocb_clear_sem_locked(iocb);
+ }
if (rw_level != -1)
ocfs2_rw_unlock(inode, rw_level);
mlog_exit(ret);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 97bf761..f5afbbe 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -61,7 +61,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
-int ocfs2_permission(struct inode *inode, int mask);
+int ocfs2_permission(struct inode *inode, int mask, unsigned int flags);
int ocfs2_should_update_atime(struct inode *inode,
struct vfsmount *vfsmnt);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index f935fd6..4068c6c 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -434,7 +434,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
* #1 and #2 can be simply solved by never taking the lock
* here for system files (which are the only type we read
* during mount). It's a heavier approach, but our main
- * concern is user-accesible files anyway.
+ * concern is user-accessible files anyway.
*
* #3 works itself out because we'll eventually take the
* cluster lock before trusting anything anyway.
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ff5744e..30c5231 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
spin_unlock(&oi->ip_lock);
bail_add:
- dentry->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(dentry, &ocfs2_dentry_ops);
ret = d_splice_alias(inode, dentry);
if (inode) {
@@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir,
mlog_errno(status);
goto leave;
}
- dentry->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(dentry, &ocfs2_dentry_ops);
status = ocfs2_add_entry(handle, dentry, inode,
OCFS2_I(inode)->ip_blkno, parent_fe_bh,
@@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry,
}
ihold(inode);
- dentry->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(dentry, &ocfs2_dentry_ops);
d_instantiate(dentry, inode);
out_commit:
@@ -1017,8 +1017,11 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
* An error return must mean that no cluster locks
* were held on function exit.
*/
- if (oi1->ip_blkno != oi2->ip_blkno)
+ if (oi1->ip_blkno != oi2->ip_blkno) {
ocfs2_inode_unlock(inode2, 1);
+ brelse(*bh2);
+ *bh2 = NULL;
+ }
if (status != -ENOENT)
mlog_errno(status);
@@ -1794,7 +1797,7 @@ static int ocfs2_symlink(struct inode *dir,
mlog_errno(status);
goto bail;
}
- dentry->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(dentry, &ocfs2_dentry_ops);
status = ocfs2_add_entry(handle, dentry, inode,
le64_to_cpu(fe->i_blkno), parent_fe_bh,
@@ -2459,7 +2462,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
goto out_commit;
}
- dentry->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(dentry, &ocfs2_dentry_ops);
d_instantiate(dentry, inode);
status = 0;
out_commit:
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d840821..51cd689 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -159,7 +159,9 @@ struct ocfs2_lock_res {
char l_name[OCFS2_LOCK_ID_MAX_LEN];
unsigned int l_ro_holders;
unsigned int l_ex_holders;
- unsigned char l_level;
+ signed char l_level;
+ signed char l_requested;
+ signed char l_blocking;
/* Data packed - type enum ocfs2_lock_type */
unsigned char l_type;
@@ -169,8 +171,6 @@ struct ocfs2_lock_res {
unsigned char l_action;
/* Data packed - enum type ocfs2_unlock_action */
unsigned char l_unlock_action;
- unsigned char l_requested;
- unsigned char l_blocking;
unsigned int l_pending_gen;
spinlock_t l_lock;
@@ -420,6 +420,11 @@ struct ocfs2_super
struct inode *osb_tl_inode;
struct buffer_head *osb_tl_bh;
struct delayed_work osb_truncate_log_wq;
+ /*
+ * How many clusters in our truncate log.
+ * It must be protected by osb_tl_inode->i_mutex.
+ */
+ unsigned int truncated_clusters;
struct ocfs2_node_map osb_recovering_orphan_dirs;
unsigned int *osb_orphan_wipes;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c2e4f82..bf2e776 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -350,7 +350,7 @@ enum {
#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE
NUM_SYSTEM_INODES
};
-#define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE
+#define NUM_GLOBAL_SYSTEM_INODES OCFS2_FIRST_LOCAL_SYSTEM_INODE
#define NUM_LOCAL_SYSTEM_INODES \
(NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE)
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 252e7c8..a5ebe42 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -190,7 +190,7 @@ static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
return c;
}
- return c;
+ return NULL;
}
/*
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 5fed60d..71998d4 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1916,7 +1916,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
if (res->sr_bg_blkno) {
/* Attempt to short-circuit the usual search mechanism
* by jumping straight to the most recently used
- * allocation group. This helps us mantain some
+ * allocation group. This helps us maintain some
* contiguousness across allocations. */
status = ocfs2_search_one_group(ac, handle, bits_wanted,
min_bits, res, &bits_left);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 56f0cb3..17ff46f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -41,7 +41,6 @@
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/quotaops.h>
-#include <linux/smp_lock.h>
#define MLOG_MASK_PREFIX ML_SUPER
#include <cluster/masklog.h>
@@ -570,11 +569,18 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
return &oi->vfs_inode;
}
-static void ocfs2_destroy_inode(struct inode *inode)
+static void ocfs2_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
}
+static void ocfs2_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, ocfs2_i_callback);
+}
+
static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
unsigned int cbits)
{
@@ -1236,14 +1242,12 @@ read_super_error:
return status;
}
-static int ocfs2_get_sb(struct file_system_type *fs_type,
+static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
int flags,
const char *dev_name,
- void *data,
- struct vfsmount *mnt)
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
}
static void ocfs2_kill_sb(struct super_block *sb)
@@ -1267,8 +1271,7 @@ out:
static struct file_system_type ocfs2_fs_type = {
.owner = THIS_MODULE,
.name = "ocfs2",
- .get_sb = ocfs2_get_sb, /* is this called when we mount
- * the fs? */
+ .mount = ocfs2_mount,
.kill_sb = ocfs2_kill_sb,
.fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 14a2286..e043c4c 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -557,17 +557,16 @@ end:
return ret;
}
-static int omfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data, struct vfsmount *m)
+static struct dentry *omfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m);
+ return mount_bdev(fs_type, flags, dev_name, data, omfs_fill_super);
}
static struct file_system_type omfs_fs_type = {
.owner = THIS_MODULE,
.name = "omfs",
- .get_sb = omfs_get_sb,
+ .mount = omfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/open.c b/fs/open.c
index d74e198..4197b9e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -786,11 +786,11 @@ struct file *nameidata_to_filp(struct nameidata *nd)
/* Pick up the filp from the open intent */
filp = nd->intent.open.file;
/* Has the filesystem initialised the file for us? */
- if (filp->f_path.dentry == NULL)
+ if (filp->f_path.dentry == NULL) {
+ path_get(&nd->path);
filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
NULL, cred);
- else
- path_put(&nd->path);
+ }
return filp;
}
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index ffcd04f..a2a5bff 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -343,11 +343,18 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
return &oi->vfs_inode;
}
-static void openprom_destroy_inode(struct inode *inode)
+static void openprom_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(op_inode_cachep, OP_I(inode));
}
+static void openprom_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, openprom_i_callback);
+}
+
static struct inode *openprom_iget(struct super_block *sb, ino_t ino)
{
struct inode *inode;
@@ -415,16 +422,16 @@ out_no_root:
return ret;
}
-static int openprom_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *openprom_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt);
+ return mount_single(fs_type, flags, data, openprom_fill_super);
}
static struct file_system_type openprom_fs_type = {
.owner = THIS_MODULE,
.name = "openpromfs",
- .get_sb = openprom_get_sb,
+ .mount = openprom_mount,
.kill_sb = kill_anon_super,
};
diff --git a/fs/pipe.c b/fs/pipe.c
index d2d7566..04151e2 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -441,7 +441,7 @@ redo:
break;
}
if (do_wakeup) {
- wake_up_interruptible_sync(&pipe->wait);
+ wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
pipe_wait(pipe);
@@ -450,7 +450,7 @@ redo:
/* Signal writers asynchronously that there is more room. */
if (do_wakeup) {
- wake_up_interruptible_sync(&pipe->wait);
+ wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
if (ret > 0)
@@ -612,7 +612,7 @@ redo2:
break;
}
if (do_wakeup) {
- wake_up_interruptible_sync(&pipe->wait);
+ wake_up_interruptible_sync_poll(&pipe->wait, POLLIN);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
do_wakeup = 0;
}
@@ -623,7 +623,7 @@ redo2:
out:
mutex_unlock(&inode->i_mutex);
if (do_wakeup) {
- wake_up_interruptible_sync(&pipe->wait);
+ wake_up_interruptible_sync_poll(&pipe->wait, POLLIN);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
}
if (ret > 0)
@@ -715,7 +715,7 @@ pipe_release(struct inode *inode, int decr, int decw)
if (!pipe->readers && !pipe->writers) {
free_pipe_info(inode);
} else {
- wake_up_interruptible_sync(&pipe->wait);
+ wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
@@ -999,12 +999,12 @@ struct file *create_write_pipe(int flags)
goto err;
err = -ENOMEM;
- path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
+ path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
if (!path.dentry)
goto err_inode;
path.mnt = mntget(pipe_mnt);
- path.dentry->d_op = &pipefs_dentry_operations;
+ d_set_d_op(path.dentry, &pipefs_dentry_operations);
d_instantiate(path.dentry, inode);
err = -ENFILE;
@@ -1199,12 +1199,24 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
return ret;
}
+/*
+ * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
+ * location, so checking ->i_pipe is not enough to verify that this is a
+ * pipe.
+ */
+struct pipe_inode_info *get_pipe_info(struct file *file)
+{
+ struct inode *i = file->f_path.dentry->d_inode;
+
+ return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
+}
+
long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct pipe_inode_info *pipe;
long ret;
- pipe = file->f_path.dentry->d_inode->i_pipe;
+ pipe = get_pipe_info(file);
if (!pipe)
return -EBADF;
@@ -1241,22 +1253,25 @@ out:
return ret;
}
+static const struct super_operations pipefs_ops = {
+ .destroy_inode = free_inode_nonrcu,
+};
+
/*
* pipefs should _never_ be mounted by userland - too much of security hassle,
* no real gain from having the whole whorehouse mounted. So we don't need
* any operations on the root directory. However, we need a non-trivial
* d_name - pipe: will go nicely and kill the special-casing in procfs.
*/
-static int pipefs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *pipefs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt);
+ return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC);
}
static struct file_system_type pipe_fs_type = {
.name = "pipefs",
- .get_sb = pipefs_get_sb,
+ .mount = pipefs_mount,
.kill_sb = kill_anon_super,
};
@@ -1277,7 +1292,7 @@ static int __init init_pipe_fs(void)
static void __exit exit_pipe_fs(void)
{
unregister_filesystem(&pipe_fs_type);
- mntput(pipe_mnt);
+ mntput_long(pipe_mnt);
}
fs_initcall(init_pipe_fs);
diff --git a/fs/pnode.c b/fs/pnode.c
index 8066b8d..d42514e 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -288,7 +288,7 @@ out:
*/
static inline int do_refcount_check(struct vfsmount *mnt, int count)
{
- int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts;
+ int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
return (mycount > count);
}
@@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
* Check if any of these mounts that **do not have submounts**
* have more references than 'refcnt'. If so return busy.
*
- * vfsmount lock must be held for read or write
+ * vfsmount lock must be held for write
*/
int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
{
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 2758e2a..df434c5 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -10,6 +10,7 @@ proc-$(CONFIG_MMU) := mmu.o task_mmu.o
proc-y += inode.o root.o base.o generic.o array.o \
proc_tty.o
proc-y += cmdline.o
+proc-y += consoles.o
proc-y += cpuinfo.o
proc-y += devices.o
proc-y += interrupts.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index fff6572..df2b703 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -95,7 +95,7 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
get_task_comm(tcomm, p);
- seq_printf(m, "Name:\t");
+ seq_puts(m, "Name:\t");
end = m->buf + m->size;
buf = m->buf + m->count;
name = tcomm;
@@ -122,7 +122,7 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
buf++;
}
m->count = buf - m->buf;
- seq_printf(m, "\n");
+ seq_putc(m, '\n');
}
/*
@@ -208,7 +208,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
seq_printf(m, "%d ", GROUP_AT(group_info, g));
put_cred(cred);
- seq_printf(m, "\n");
+ seq_putc(m, '\n');
}
static void render_sigset_t(struct seq_file *m, const char *header,
@@ -216,7 +216,7 @@ static void render_sigset_t(struct seq_file *m, const char *header,
{
int i;
- seq_printf(m, "%s", header);
+ seq_puts(m, header);
i = _NSIG;
do {
@@ -230,7 +230,7 @@ static void render_sigset_t(struct seq_file *m, const char *header,
seq_printf(m, "%x", x);
} while (i >= 4);
- seq_printf(m, "\n");
+ seq_putc(m, '\n');
}
static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
@@ -291,12 +291,12 @@ static void render_cap_t(struct seq_file *m, const char *header,
{
unsigned __capi;
- seq_printf(m, "%s", header);
+ seq_puts(m, header);
CAP_FOR_EACH_U32(__capi) {
seq_printf(m, "%08x",
a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
}
- seq_printf(m, "\n");
+ seq_putc(m, '\n');
}
static inline void task_cap(struct seq_file *m, struct task_struct *p)
@@ -329,12 +329,12 @@ static inline void task_context_switch_counts(struct seq_file *m,
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
{
- seq_printf(m, "Cpus_allowed:\t");
+ seq_puts(m, "Cpus_allowed:\t");
seq_cpumask(m, &task->cpus_allowed);
- seq_printf(m, "\n");
- seq_printf(m, "Cpus_allowed_list:\t");
+ seq_putc(m, '\n');
+ seq_puts(m, "Cpus_allowed_list:\t");
seq_cpumask_list(m, &task->cpus_allowed);
- seq_printf(m, "\n");
+ seq_putc(m, '\n');
}
int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
@@ -535,15 +535,15 @@ int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0;
+ unsigned long size = 0, resident = 0, shared = 0, text = 0, data = 0;
struct mm_struct *mm = get_task_mm(task);
if (mm) {
size = task_statm(mm, &shared, &text, &data, &resident);
mmput(mm);
}
- seq_printf(m, "%d %d %d %d %d %d %d\n",
- size, resident, shared, text, lib, data, 0);
+ seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
+ size, resident, shared, text, data);
return 0;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index f3d02ca..93f1cdd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -373,26 +373,20 @@ static int lstats_show_proc(struct seq_file *m, void *v)
return -ESRCH;
seq_puts(m, "Latency Top version : v0.1\n");
for (i = 0; i < 32; i++) {
- if (task->latency_record[i].backtrace[0]) {
+ struct latency_record *lr = &task->latency_record[i];
+ if (lr->backtrace[0]) {
int q;
- seq_printf(m, "%i %li %li ",
- task->latency_record[i].count,
- task->latency_record[i].time,
- task->latency_record[i].max);
+ seq_printf(m, "%i %li %li",
+ lr->count, lr->time, lr->max);
for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
- char sym[KSYM_SYMBOL_LEN];
- char *c;
- if (!task->latency_record[i].backtrace[q])
+ unsigned long bt = lr->backtrace[q];
+ if (!bt)
break;
- if (task->latency_record[i].backtrace[q] == ULONG_MAX)
+ if (bt == ULONG_MAX)
break;
- sprint_symbol(sym, task->latency_record[i].backtrace[q]);
- c = strchr(sym, '+');
- if (c)
- *c = 0;
- seq_printf(m, "%s ", sym);
+ seq_printf(m, " %ps", (void *)bt);
}
- seq_printf(m, "\n");
+ seq_putc(m, '\n');
}
}
@@ -751,14 +745,7 @@ static int proc_single_show(struct seq_file *m, void *v)
static int proc_single_open(struct inode *inode, struct file *filp)
{
- int ret;
- ret = single_open(filp, proc_single_show, NULL);
- if (!ret) {
- struct seq_file *m = filp->private_data;
-
- m->private = inode;
- }
- return ret;
+ return single_open(filp, proc_single_show, inode);
}
static const struct file_operations proc_single_file_operations = {
@@ -1386,9 +1373,77 @@ sched_write(struct file *file, const char __user *buf,
static int sched_open(struct inode *inode, struct file *filp)
{
+ return single_open(filp, sched_show, inode);
+}
+
+static const struct file_operations proc_pid_sched_operations = {
+ .open = sched_open,
+ .read = seq_read,
+ .write = sched_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#endif
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+/*
+ * Print out autogroup related information:
+ */
+static int sched_autogroup_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+ proc_sched_autogroup_show_task(p, m);
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static ssize_t
+sched_autogroup_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct task_struct *p;
+ char buffer[PROC_NUMBUF];
+ long nice;
+ int err;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count))
+ return -EFAULT;
+
+ err = strict_strtol(strstrip(buffer), 0, &nice);
+ if (err)
+ return -EINVAL;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ err = nice;
+ err = proc_sched_autogroup_set_nice(p, &err);
+ if (err)
+ count = err;
+
+ put_task_struct(p);
+
+ return count;
+}
+
+static int sched_autogroup_open(struct inode *inode, struct file *filp)
+{
int ret;
- ret = single_open(filp, sched_show, NULL);
+ ret = single_open(filp, sched_autogroup_show, NULL);
if (!ret) {
struct seq_file *m = filp->private_data;
@@ -1397,15 +1452,15 @@ static int sched_open(struct inode *inode, struct file *filp)
return ret;
}
-static const struct file_operations proc_pid_sched_operations = {
- .open = sched_open,
+static const struct file_operations proc_pid_sched_autogroup_operations = {
+ .open = sched_autogroup_open,
.read = seq_read,
- .write = sched_write,
+ .write = sched_autogroup_write,
.llseek = seq_lseek,
.release = single_release,
};
-#endif
+#endif /* CONFIG_SCHED_AUTOGROUP */
static ssize_t comm_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
@@ -1454,15 +1509,7 @@ static int comm_show(struct seq_file *m, void *v)
static int comm_open(struct inode *inode, struct file *filp)
{
- int ret;
-
- ret = single_open(filp, comm_show, NULL);
- if (!ret) {
- struct seq_file *m = filp->private_data;
-
- m->private = inode;
- }
- return ret;
+ return single_open(filp, comm_show, inode);
}
static const struct file_operations proc_pid_set_comm_operations = {
@@ -1574,7 +1621,7 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
if (!tmp)
return -ENOMEM;
- pathname = d_path_with_unreachable(path, tmp, PAGE_SIZE);
+ pathname = d_path(path, tmp, PAGE_SIZE);
len = PTR_ERR(pathname);
if (IS_ERR(pathname))
goto out;
@@ -1719,10 +1766,16 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
*/
static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct inode *inode = dentry->d_inode;
- struct task_struct *task = get_proc_task(inode);
+ struct inode *inode;
+ struct task_struct *task;
const struct cred *cred;
+ if (nd && nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = dentry->d_inode;
+ task = get_proc_task(inode);
+
if (task) {
if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
task_dumpable(task)) {
@@ -1744,7 +1797,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
return 0;
}
-static int pid_delete_dentry(struct dentry * dentry)
+static int pid_delete_dentry(const struct dentry * dentry)
{
/* Is the task we represent dead?
* If so, then don't put the dentry on the lru list,
@@ -1888,12 +1941,19 @@ static int proc_fd_link(struct inode *inode, struct path *path)
static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct inode *inode = dentry->d_inode;
- struct task_struct *task = get_proc_task(inode);
- int fd = proc_fd(inode);
+ struct inode *inode;
+ struct task_struct *task;
+ int fd;
struct files_struct *files;
const struct cred *cred;
+ if (nd && nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = dentry->d_inode;
+ task = get_proc_task(inode);
+ fd = proc_fd(inode);
+
if (task) {
files = get_files_struct(task);
if (files) {
@@ -1969,7 +2029,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64;
ei->op.proc_get_link = proc_fd_link;
- dentry->d_op = &tid_fd_dentry_operations;
+ d_set_d_op(dentry, &tid_fd_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (tid_fd_revalidate(dentry, NULL))
@@ -2101,11 +2161,13 @@ static const struct file_operations proc_fd_operations = {
* /proc/pid/fd needs a special permission handler so that a process can still
* access /proc/self/fd after it has executed a setuid().
*/
-static int proc_fd_permission(struct inode *inode, int mask)
+static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
{
int rv;
- rv = generic_permission(inode, mask, NULL);
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+ rv = generic_permission(inode, mask, flags, NULL);
if (rv == 0)
return 0;
if (task_pid(current) == proc_pid(inode))
@@ -2137,7 +2199,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
ei->fd = fd;
inode->i_mode = S_IFREG | S_IRUSR;
inode->i_fop = &proc_fdinfo_file_operations;
- dentry->d_op = &tid_fd_dentry_operations;
+ d_set_d_op(dentry, &tid_fd_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (tid_fd_revalidate(dentry, NULL))
@@ -2196,7 +2258,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
- dentry->d_op = &pid_dentry_operations;
+ d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, NULL))
@@ -2563,8 +2625,14 @@ static const struct pid_entry proc_base_stuff[] = {
*/
static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct inode *inode = dentry->d_inode;
- struct task_struct *task = get_proc_task(inode);
+ struct inode *inode;
+ struct task_struct *task;
+
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = dentry->d_inode;
+ task = get_proc_task(inode);
if (task) {
put_task_struct(task);
return 1;
@@ -2615,7 +2683,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
- dentry->d_op = &proc_base_dentry_operations;
+ d_set_d_op(dentry, &proc_base_dentry_operations);
d_add(dentry, inode);
error = NULL;
out:
@@ -2733,6 +2801,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
+#ifdef CONFIG_SCHED_AUTOGROUP
+ REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
+#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
INF("syscall", S_IRUSR, proc_pid_syscall),
@@ -2926,7 +2997,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
ARRAY_SIZE(tgid_base_stuff));
- dentry->d_op = &pid_dentry_operations;
+ d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
@@ -3169,7 +3240,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
ARRAY_SIZE(tid_base_stuff));
- dentry->d_op = &pid_dentry_operations;
+ d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
new file mode 100644
index 0000000..eafc22a
--- /dev/null
+++ b/fs/proc/consoles.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2010 Werner Fink, Jiri Slaby
+ *
+ * Licensed under GPLv2
+ */
+
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/tty_driver.h>
+
+/*
+ * This is handler for /proc/consoles
+ */
+static int show_console_dev(struct seq_file *m, void *v)
+{
+ static const struct {
+ short flag;
+ char name;
+ } con_flags[] = {
+ { CON_ENABLED, 'E' },
+ { CON_CONSDEV, 'C' },
+ { CON_BOOT, 'B' },
+ { CON_PRINTBUFFER, 'p' },
+ { CON_BRL, 'b' },
+ { CON_ANYTIME, 'a' },
+ };
+ char flags[ARRAY_SIZE(con_flags) + 1];
+ struct console *con = v;
+ unsigned int a;
+ int len;
+ dev_t dev = 0;
+
+ if (con->device) {
+ const struct tty_driver *driver;
+ int index;
+ driver = con->device(con, &index);
+ if (driver) {
+ dev = MKDEV(driver->major, driver->minor_start);
+ dev += index;
+ }
+ }
+
+ for (a = 0; a < ARRAY_SIZE(con_flags); a++)
+ flags[a] = (con->flags & con_flags[a].flag) ?
+ con_flags[a].name : ' ';
+ flags[a] = 0;
+
+ seq_printf(m, "%s%d%n", con->name, con->index, &len);
+ len = 21 - len;
+ if (len < 1)
+ len = 1;
+ seq_printf(m, "%*c%c%c%c (%s)", len, ' ', con->read ? 'R' : '-',
+ con->write ? 'W' : '-', con->unblank ? 'U' : '-',
+ flags);
+ if (dev)
+ seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));
+
+ seq_printf(m, "\n");
+
+ return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ struct console *con;
+ loff_t off = 0;
+
+ acquire_console_sem();
+ for_each_console(con)
+ if (off++ == *pos)
+ break;
+
+ return con;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct console *con = v;
+ ++*pos;
+ return con->next;
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+ release_console_sem();
+}
+
+static const struct seq_operations consoles_op = {
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_console_dev
+};
+
+static int consoles_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &consoles_op);
+}
+
+static const struct file_operations proc_consoles_operations = {
+ .open = consoles_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int __init proc_consoles_init(void)
+{
+ proc_create("consoles", 0, NULL, &proc_consoles_operations);
+ return 0;
+}
+module_init(proc_consoles_init);
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index 59ee7da..b143471 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -9,14 +9,14 @@ static int devinfo_show(struct seq_file *f, void *v)
if (i < CHRDEV_MAJOR_HASH_SIZE) {
if (i == 0)
- seq_printf(f, "Character devices:\n");
+ seq_puts(f, "Character devices:\n");
chrdev_show(f, i);
}
#ifdef CONFIG_BLOCK
else {
i -= CHRDEV_MAJOR_HASH_SIZE;
if (i == 0)
- seq_printf(f, "\nBlock devices:\n");
+ seq_puts(f, "\nBlock devices:\n");
blkdev_show(f, i);
}
#endif
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index dd29f03..01e07f2 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -400,7 +400,7 @@ static const struct inode_operations proc_link_inode_operations = {
* smarter: we could keep a "volatile" flag in the
* inode to indicate which ones to keep.
*/
-static int proc_delete_dentry(struct dentry * dentry)
+static int proc_delete_dentry(const struct dentry * dentry)
{
return 1;
}
@@ -425,13 +425,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
if (de->namelen != dentry->d_name.len)
continue;
if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
- unsigned int ino;
-
- ino = de->low_ino;
pde_get(de);
spin_unlock(&proc_subdir_lock);
error = -EINVAL;
- inode = proc_get_inode(dir->i_sb, ino, de);
+ inode = proc_get_inode(dir->i_sb, de);
goto out_unlock;
}
}
@@ -439,7 +436,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
out_unlock:
if (inode) {
- dentry->d_op = &proc_dentry_operations;
+ d_set_d_op(dentry, &proc_dentry_operations);
d_add(dentry, inode);
return NULL;
}
@@ -768,12 +765,7 @@ EXPORT_SYMBOL(proc_create_data);
static void free_proc_entry(struct proc_dir_entry *de)
{
- unsigned int ino = de->low_ino;
-
- if (ino < PROC_DYNAMIC_FIRST)
- return;
-
- release_inode_number(ino);
+ release_inode_number(de->low_ino);
if (S_ISLNK(de->mode))
kfree(de->data);
@@ -834,12 +826,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
wait_for_completion(de->pde_unload_completion);
- goto continue_removing;
+ spin_lock(&de->pde_unload_lock);
}
- spin_unlock(&de->pde_unload_lock);
-continue_removing:
- spin_lock(&de->pde_unload_lock);
while (!list_empty(&de->pde_openers)) {
struct pde_opener *pdeo;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 9c2b5f4..176ce4c 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -16,7 +16,6 @@
#include <linux/limits.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/smp_lock.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
@@ -66,11 +65,18 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
return inode;
}
-static void proc_destroy_inode(struct inode *inode)
+static void proc_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(proc_inode_cachep, PROC_I(inode));
}
+static void proc_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, proc_i_callback);
+}
+
static void init_once(void *foo)
{
struct proc_inode *ei = (struct proc_inode *) foo;
@@ -410,12 +416,11 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
};
#endif
-struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
- struct proc_dir_entry *de)
+struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{
struct inode * inode;
- inode = iget_locked(sb, ino);
+ inode = iget_locked(sb, de->low_ino);
if (!inode)
return NULL;
if (inode->i_state & I_NEW) {
@@ -465,7 +470,7 @@ int proc_fill_super(struct super_block *s)
s->s_time_gran = 1;
pde_get(&proc_root);
- root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);
+ root_inode = proc_get_inode(s, &proc_root);
if (!root_inode)
goto out_no_root;
root_inode->i_uid = 0;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 1f24a3e..9ad561d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -96,7 +96,8 @@ extern spinlock_t proc_subdir_lock;
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
unsigned long task_vsize(struct mm_struct *);
-int task_statm(struct mm_struct *, int *, int *, int *, int *);
+unsigned long task_statm(struct mm_struct *,
+ unsigned long *, unsigned long *, unsigned long *, unsigned long *);
void task_mem(struct seq_file *, struct mm_struct *);
static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
@@ -108,7 +109,7 @@ void pde_put(struct proc_dir_entry *pde);
extern struct vfsmount *proc_mnt;
int proc_fill_super(struct super_block *);
-struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
+struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
/*
* These are generic /proc routines that use the internal
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 6f37c39..d245cb2 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -558,7 +558,7 @@ static int open_kcore(struct inode *inode, struct file *filp)
static const struct file_operations proc_kcore_operations = {
.read = read_kcore,
.open = open_kcore,
- .llseek = generic_file_llseek,
+ .llseek = default_llseek,
};
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 3b8b4566..b06c674 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -40,7 +40,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
ppage = pfn_to_page(pfn);
else
ppage = NULL;
- if (!ppage)
+ if (!ppage || PageSlab(ppage))
pcount = 0;
else
pcount = page_mapcount(ppage);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b652cb0..09a1f92 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -5,6 +5,7 @@
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
+#include <linux/namei.h>
#include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations;
@@ -120,7 +121,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
goto out;
err = NULL;
- dentry->d_op = &proc_sys_dentry_operations;
+ d_set_d_op(dentry, &proc_sys_dentry_operations);
d_add(dentry, inode);
out:
@@ -201,7 +202,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent,
dput(child);
return -ENOMEM;
} else {
- child->d_op = &proc_sys_dentry_operations;
+ d_set_d_op(child, &proc_sys_dentry_operations);
d_add(child, inode);
}
} else {
@@ -294,7 +295,7 @@ out:
return ret;
}
-static int proc_sys_permission(struct inode *inode, int mask)
+static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
{
/*
* sysctl entries that are not writeable,
@@ -304,6 +305,9 @@ static int proc_sys_permission(struct inode *inode, int mask)
struct ctl_table *table;
int error;
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
/* Executable files are not allowed under /proc/sys/ */
if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
return -EACCES;
@@ -389,23 +393,30 @@ static const struct inode_operations proc_sys_dir_operations = {
static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
{
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
return !PROC_I(dentry->d_inode)->sysctl->unregistering;
}
-static int proc_sys_delete(struct dentry *dentry)
+static int proc_sys_delete(const struct dentry *dentry)
{
return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
}
-static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
- struct qstr *name)
+static int proc_sys_compare(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- struct dentry *dentry = container_of(qstr, struct dentry, d_name);
- if (qstr->len != name->len)
+ /* Although proc doesn't have negative dentries, rcu-walk means
+ * that inode here can be NULL */
+ if (!inode)
+ return 0;
+ if (name->len != len)
return 1;
- if (memcmp(qstr->name, name->name, name->len))
+ if (memcmp(name->name, str, len))
return 1;
- return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
+ return !sysctl_is_seen(PROC_I(inode)->sysctl);
}
static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 83adcc8..cb761f0 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -36,27 +36,27 @@ static void show_tty_range(struct seq_file *m, struct tty_driver *p,
}
switch (p->type) {
case TTY_DRIVER_TYPE_SYSTEM:
- seq_printf(m, "system");
+ seq_puts(m, "system");
if (p->subtype == SYSTEM_TYPE_TTY)
- seq_printf(m, ":/dev/tty");
+ seq_puts(m, ":/dev/tty");
else if (p->subtype == SYSTEM_TYPE_SYSCONS)
- seq_printf(m, ":console");
+ seq_puts(m, ":console");
else if (p->subtype == SYSTEM_TYPE_CONSOLE)
- seq_printf(m, ":vtmaster");
+ seq_puts(m, ":vtmaster");
break;
case TTY_DRIVER_TYPE_CONSOLE:
- seq_printf(m, "console");
+ seq_puts(m, "console");
break;
case TTY_DRIVER_TYPE_SERIAL:
- seq_printf(m, "serial");
+ seq_puts(m, "serial");
break;
case TTY_DRIVER_TYPE_PTY:
if (p->subtype == PTY_TYPE_MASTER)
- seq_printf(m, "pty:master");
+ seq_puts(m, "pty:master");
else if (p->subtype == PTY_TYPE_SLAVE)
- seq_printf(m, "pty:slave");
+ seq_puts(m, "pty:slave");
else
- seq_printf(m, "pty");
+ seq_puts(m, "pty");
break;
default:
seq_printf(m, "type:%d.%d", p->type, p->subtype);
@@ -74,19 +74,19 @@ static int show_tty_driver(struct seq_file *m, void *v)
/* pseudo-drivers first */
seq_printf(m, "%-20s /dev/%-8s ", "/dev/tty", "tty");
seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 0);
- seq_printf(m, "system:/dev/tty\n");
+ seq_puts(m, "system:/dev/tty\n");
seq_printf(m, "%-20s /dev/%-8s ", "/dev/console", "console");
seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 1);
- seq_printf(m, "system:console\n");
+ seq_puts(m, "system:console\n");
#ifdef CONFIG_UNIX98_PTYS
seq_printf(m, "%-20s /dev/%-8s ", "/dev/ptmx", "ptmx");
seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 2);
- seq_printf(m, "system\n");
+ seq_puts(m, "system\n");
#endif
#ifdef CONFIG_VT
seq_printf(m, "%-20s /dev/%-8s ", "/dev/vc/0", "vc/0");
seq_printf(m, "%3d %7d ", TTY_MAJOR, 0);
- seq_printf(m, "system:vtmaster\n");
+ seq_puts(m, "system:vtmaster\n");
#endif
}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 93d99b3..ef9fa8e 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -35,8 +35,8 @@ static int proc_set_super(struct super_block *sb, void *data)
return set_anon_super(sb, NULL);
}
-static int proc_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *proc_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
int err;
struct super_block *sb;
@@ -61,14 +61,14 @@ static int proc_get_sb(struct file_system_type *fs_type,
sb = sget(fs_type, proc_test_super, proc_set_super, ns);
if (IS_ERR(sb))
- return PTR_ERR(sb);
+ return ERR_CAST(sb);
if (!sb->s_root) {
sb->s_flags = flags;
err = proc_fill_super(sb);
if (err) {
deactivate_locked_super(sb);
- return err;
+ return ERR_PTR(err);
}
ei = PROC_I(sb->s_root->d_inode);
@@ -79,11 +79,9 @@ static int proc_get_sb(struct file_system_type *fs_type,
}
sb->s_flags |= MS_ACTIVE;
- ns->proc_mnt = mnt;
}
- simple_set_mnt(mnt, sb);
- return 0;
+ return dget(sb->s_root);
}
static void proc_kill_sb(struct super_block *sb)
@@ -97,7 +95,7 @@ static void proc_kill_sb(struct super_block *sb)
static struct file_system_type proc_fs_type = {
.name = "proc",
- .get_sb = proc_get_sb,
+ .mount = proc_mount,
.kill_sb = proc_kill_sb,
};
@@ -115,6 +113,7 @@ void __init proc_root_init(void)
return;
}
+ init_pid_ns.proc_mnt = proc_mnt;
proc_symlink("mounts", NULL, "self/mounts");
proc_net_init();
@@ -213,6 +212,7 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
if (IS_ERR(mnt))
return PTR_ERR(mnt);
+ ns->proc_mnt = mnt;
return 0;
}
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 3799473..62604be 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -10,16 +10,16 @@ static int show_softirqs(struct seq_file *p, void *v)
{
int i, j;
- seq_printf(p, " ");
+ seq_puts(p, " ");
for_each_possible_cpu(i)
seq_printf(p, "CPU%-8d", i);
- seq_printf(p, "\n");
+ seq_putc(p, '\n');
for (i = 0; i < NR_SOFTIRQS; i++) {
seq_printf(p, "%12s:", softirq_to_name[i]);
for_each_possible_cpu(j)
seq_printf(p, " %10u", kstat_softirqs_cpu(i, j));
- seq_printf(p, "\n");
+ seq_putc(p, '\n');
}
return 0;
}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index e15a19c..1cffa2b 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -126,7 +126,7 @@ static int show_stat(struct seq_file *p, void *v)
for (i = 0; i < NR_SOFTIRQS; i++)
seq_printf(p, " %u", per_softirq_sums[i]);
- seq_printf(p, "\n");
+ seq_putc(p, '\n');
return 0;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index da6b01d..c3755bd 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -66,8 +66,9 @@ unsigned long task_vsize(struct mm_struct *mm)
return PAGE_SIZE * mm->total_vm;
}
-int task_statm(struct mm_struct *mm, int *shared, int *text,
- int *data, int *resident)
+unsigned long task_statm(struct mm_struct *mm,
+ unsigned long *shared, unsigned long *text,
+ unsigned long *data, unsigned long *resident)
{
*shared = get_mm_counter(mm, MM_FILEPAGES);
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
@@ -706,6 +707,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
* skip over unmapped regions.
*/
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
+#define PAGEMAP_WALK_MASK (PMD_MASK)
static ssize_t pagemap_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -776,7 +778,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long end;
pm.pos = 0;
- end = start_vaddr + PAGEMAP_WALK_SIZE;
+ end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
/* overflow ? */
if (end < start_vaddr || end > end_vaddr)
end = end_vaddr;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index cb6306e..b535d3e 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -92,13 +92,14 @@ unsigned long task_vsize(struct mm_struct *mm)
return vsize;
}
-int task_statm(struct mm_struct *mm, int *shared, int *text,
- int *data, int *resident)
+unsigned long task_statm(struct mm_struct *mm,
+ unsigned long *shared, unsigned long *text,
+ unsigned long *data, unsigned long *resident)
{
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *p;
- int size = kobjsize(mm);
+ unsigned long size = kobjsize(mm);
down_read(&mm->mmap_sem);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 2367fb3..74802bc5 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -499,7 +499,7 @@ static int __init parse_crash_elf64_headers(void)
/* Do some basic Verification. */
if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
(ehdr.e_type != ET_CORE) ||
- !vmcore_elf_check_arch(&ehdr) ||
+ !vmcore_elf64_check_arch(&ehdr) ||
ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
ehdr.e_version != EV_CURRENT ||
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 01bad30..e63b417 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -425,11 +425,18 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void qnx4_destroy_inode(struct inode *inode)
+static void qnx4_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
}
+static void qnx4_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, qnx4_i_callback);
+}
+
static void init_once(void *foo)
{
struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
@@ -454,17 +461,16 @@ static void destroy_inodecache(void)
kmem_cache_destroy(qnx4_inode_cachep);
}
-static int qnx4_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *qnx4_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, qnx4_fill_super);
}
static struct file_system_type qnx4_fs_type = {
.owner = THIS_MODULE,
.name = "qnx4",
- .get_sb = qnx4_get_sb,
+ .mount = qnx4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 0fed41e..84becd3 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -133,16 +133,20 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
EXPORT_SYMBOL(dq_data_lock);
void __quota_error(struct super_block *sb, const char *func,
- const char *fmt, ...)
+ const char *fmt, ...)
{
- va_list args;
-
if (printk_ratelimit()) {
+ va_list args;
+ struct va_format vaf;
+
va_start(args, fmt);
- printk(KERN_ERR "Quota error (device %s): %s: ",
- sb->s_id, func);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_ERR "Quota error (device %s): %s: %pV\n",
+ sb->s_id, func, &vaf);
+
va_end(args);
}
}
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 9e48874..e41c1becf 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -468,8 +468,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
return -ENOMEM;
ret = read_blk(info, *blk, buf);
if (ret < 0) {
- quota_error(dquot->dq_sb, "Can't read quota data "
- "block %u", blk);
+ quota_error(dquot->dq_sb, "Can't read quota data block %u",
+ *blk);
goto out_buf;
}
newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
@@ -493,8 +493,9 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
} else {
ret = write_blk(info, *blk, buf);
if (ret < 0)
- quota_error(dquot->dq_sb, "Can't write quota "
- "tree block %u", blk);
+ quota_error(dquot->dq_sb,
+ "Can't write quota tree block %u",
+ *blk);
}
}
out_buf:
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 67fadb1..eacb166 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -255,17 +255,16 @@ fail:
return err;
}
-int ramfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+struct dentry *ramfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt);
+ return mount_nodev(fs_type, flags, data, ramfs_fill_super);
}
-static int rootfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *rootfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super,
- mnt);
+ return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
}
static void ramfs_kill_sb(struct super_block *sb)
@@ -276,12 +275,12 @@ static void ramfs_kill_sb(struct super_block *sb)
static struct file_system_type ramfs_fs_type = {
.name = "ramfs",
- .get_sb = ramfs_get_sb,
+ .mount = ramfs_mount,
.kill_sb = ramfs_kill_sb,
};
static struct file_system_type rootfs_fs_type = {
.name = "rootfs",
- .get_sb = rootfs_get_sb,
+ .mount = rootfs_mount,
.kill_sb = kill_litter_super,
};
diff --git a/fs/read_write.c b/fs/read_write.c
index 9cd9d14..5d431ba 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,7 +9,6 @@
#include <linux/fcntl.h>
#include <linux/file.h>
#include <linux/uio.h>
-#include <linux/smp_lock.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
#include <linux/module.h>
@@ -243,8 +242,6 @@ bad:
* them to something that fits in "int" so that others
* won't have to do range checks all the time.
*/
-#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
-
int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
{
struct inode *inode;
@@ -584,65 +581,71 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
unsigned long nr_segs, unsigned long fast_segs,
struct iovec *fast_pointer,
struct iovec **ret_pointer)
- {
+{
unsigned long seg;
- ssize_t ret;
+ ssize_t ret;
struct iovec *iov = fast_pointer;
- /*
- * SuS says "The readv() function *may* fail if the iovcnt argument
- * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
- * traditionally returned zero for zero segments, so...
- */
+ /*
+ * SuS says "The readv() function *may* fail if the iovcnt argument
+ * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
+ * traditionally returned zero for zero segments, so...
+ */
if (nr_segs == 0) {
ret = 0;
- goto out;
+ goto out;
}
- /*
- * First get the "struct iovec" from user memory and
- * verify all the pointers
- */
+ /*
+ * First get the "struct iovec" from user memory and
+ * verify all the pointers
+ */
if (nr_segs > UIO_MAXIOV) {
ret = -EINVAL;
- goto out;
+ goto out;
}
if (nr_segs > fast_segs) {
- iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+ iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
if (iov == NULL) {
ret = -ENOMEM;
- goto out;
+ goto out;
}
- }
+ }
if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
ret = -EFAULT;
- goto out;
+ goto out;
}
- /*
+ /*
* According to the Single Unix Specification we should return EINVAL
* if an element length is < 0 when cast to ssize_t or if the
* total length would overflow the ssize_t return value of the
* system call.
- */
+ *
+ * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
+ * overflow case.
+ */
ret = 0;
- for (seg = 0; seg < nr_segs; seg++) {
- void __user *buf = iov[seg].iov_base;
- ssize_t len = (ssize_t)iov[seg].iov_len;
+ for (seg = 0; seg < nr_segs; seg++) {
+ void __user *buf = iov[seg].iov_base;
+ ssize_t len = (ssize_t)iov[seg].iov_len;
/* see if we we're about to use an invalid len or if
* it's about to overflow ssize_t */
- if (len < 0 || (ret + len < ret)) {
+ if (len < 0) {
ret = -EINVAL;
- goto out;
+ goto out;
}
if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
ret = -EFAULT;
- goto out;
+ goto out;
+ }
+ if (len > MAX_RW_COUNT - ret) {
+ len = MAX_RW_COUNT - ret;
+ iov[seg].iov_len = len;
}
-
ret += len;
- }
+ }
out:
*ret_pointer = iov;
return ret;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 41656d4..0bae036 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -8,7 +8,6 @@
#include <linux/reiserfs_acl.h>
#include <linux/reiserfs_xattr.h>
#include <linux/exportfs.h>
-#include <linux/smp_lock.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/slab.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index adf22b4..79265fd 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -9,7 +9,6 @@
#include <linux/time.h>
#include <asm/uaccess.h>
#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
#include <linux/compat.h>
/*
@@ -184,12 +183,11 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
return 0;
}
- /* we need to make sure nobody is changing the file size beneath
- ** us
- */
- reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
depth = reiserfs_write_lock_once(inode->i_sb);
+ /* we need to make sure nobody is changing the file size beneath us */
+ reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
+
write_from = inode->i_size & (blocksize - 1);
/* if we are on a block boundary, we are already unpacked. */
if (write_from == 0) {
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 076c8b1..d31bce1 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -43,7 +43,6 @@
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/workqueue.h>
#include <linux/writeback.h>
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index adbc6f5..45de98b 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -586,13 +586,13 @@ void print_block(struct buffer_head *bh, ...) //int print_mode, int first, int l
va_list args;
int mode, first, last;
- va_start(args, bh);
-
if (!bh) {
printk("print_block: buffer is NULL\n");
return;
}
+ va_start(args, bh);
+
mode = va_arg(args, int);
first = va_arg(args, int);
last = va_arg(args, int);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e15ff612..2575682 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -28,7 +28,6 @@
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/crc32.h>
-#include <linux/smp_lock.h>
struct file_system_type reiserfs_fs_type;
@@ -530,11 +529,18 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void reiserfs_destroy_inode(struct inode *inode)
+static void reiserfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
}
+static void reiserfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, reiserfs_i_callback);
+}
+
static void init_once(void *foo)
{
struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
@@ -2213,12 +2219,11 @@ out:
#endif
-static int get_super_block(struct file_system_type *fs_type,
+static struct dentry *get_super_block(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
}
static int __init init_reiserfs_fs(void)
@@ -2253,7 +2258,7 @@ static void __exit exit_reiserfs_fs(void)
struct file_system_type reiserfs_fs_type = {
.owner = THIS_MODULE,
.name = "reiserfs",
- .get_sb = get_super_block,
+ .mount = get_super_block,
.kill_sb = reiserfs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5d04a78..3cfb2e9 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -870,11 +870,14 @@ out:
return err;
}
-static int reiserfs_check_acl(struct inode *inode, int mask)
+static int reiserfs_check_acl(struct inode *inode, int mask, unsigned int flags)
{
struct posix_acl *acl;
int error = -EAGAIN; /* do regular unix permission checks by default */
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
if (acl) {
@@ -951,8 +954,10 @@ static int xattr_mount_check(struct super_block *s)
return 0;
}
-int reiserfs_permission(struct inode *inode, int mask)
+int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
{
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
/*
* We don't do permission checks on the internal objects.
* Permissions are determined by the "owning" object.
@@ -965,13 +970,16 @@ int reiserfs_permission(struct inode *inode, int mask)
* Stat data v1 doesn't support ACLs.
*/
if (get_inode_sd_version(inode) != STAT_DATA_V1)
- return generic_permission(inode, mask, reiserfs_check_acl);
+ return generic_permission(inode, mask, flags,
+ reiserfs_check_acl);
#endif
- return generic_permission(inode, mask, NULL);
+ return generic_permission(inode, mask, flags, NULL);
}
static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
{
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
return -EPERM;
}
@@ -990,7 +998,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
strlen(PRIVROOT_NAME));
if (!IS_ERR(dentry)) {
REISERFS_SB(s)->priv_root = dentry;
- dentry->d_op = &xattr_lookup_poison_ops;
+ d_set_d_op(dentry, &xattr_lookup_poison_ops);
if (dentry->d_inode)
dentry->d_inode->i_flags |= S_PRIVATE;
} else
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 536d697..90d2fcb 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -472,7 +472,9 @@ int reiserfs_acl_chmod(struct inode *inode)
struct reiserfs_transaction_handle th;
size_t size = reiserfs_xattr_nblocks(inode,
reiserfs_acl_size(clone->a_count));
- reiserfs_write_lock(inode->i_sb);
+ int depth;
+
+ depth = reiserfs_write_lock_once(inode->i_sb);
error = journal_begin(&th, inode->i_sb, size * 2);
if (!error) {
int error2;
@@ -482,7 +484,7 @@ int reiserfs_acl_chmod(struct inode *inode)
if (error2)
error = error2;
}
- reiserfs_write_unlock(inode->i_sb);
+ reiserfs_write_unlock_once(inode->i_sb, depth);
}
posix_acl_release(clone);
return error;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 2685805..2305e31 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -400,11 +400,18 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
/*
* return a spent inode to the slab cache
*/
-static void romfs_destroy_inode(struct inode *inode)
+static void romfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
}
+static void romfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, romfs_i_callback);
+}
+
/*
* get filesystem statistics
*/
@@ -552,20 +559,19 @@ error_rsb:
/*
* get a superblock for mounting
*/
-static int romfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *romfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- int ret = -EINVAL;
+ struct dentry *ret = ERR_PTR(-EINVAL);
#ifdef CONFIG_ROMFS_ON_MTD
- ret = get_sb_mtd(fs_type, flags, dev_name, data, romfs_fill_super,
- mnt);
+ ret = mount_mtd(fs_type, flags, dev_name, data, romfs_fill_super);
#endif
#ifdef CONFIG_ROMFS_ON_BLOCK
- if (ret == -EINVAL)
- ret = get_sb_bdev(fs_type, flags, dev_name, data,
- romfs_fill_super, mnt);
+ if (ret == ERR_PTR(-EINVAL))
+ ret = mount_bdev(fs_type, flags, dev_name, data,
+ romfs_fill_super);
#endif
return ret;
}
@@ -592,7 +598,7 @@ static void romfs_kill_sb(struct super_block *sb)
static struct file_system_type romfs_fs_type = {
.owner = THIS_MODULE,
.name = "romfs",
- .get_sb = romfs_get_sb,
+ .mount = romfs_mount,
.kill_sb = romfs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/select.c b/fs/select.c
index b7b10aa..e56560d 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -306,6 +306,8 @@ static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
rts.tv_sec = rts.tv_nsec = 0;
if (timeval) {
+ if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
+ memset(&rtv, 0, sizeof(rtv));
rtv.tv_sec = rts.tv_sec;
rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
diff --git a/fs/smbfs/Kconfig b/fs/smbfs/Kconfig
deleted file mode 100644
index 2bc24a8..0000000
--- a/fs/smbfs/Kconfig
+++ /dev/null
@@ -1,56 +0,0 @@
-config SMB_FS
- tristate "SMB file system support (OBSOLETE, please use CIFS)"
- depends on BKL # probably unfixable
- depends on INET
- select NLS
- help
- SMB (Server Message Block) is the protocol Windows for Workgroups
- (WfW), Windows 95/98, Windows NT and OS/2 Lan Manager use to share
- files and printers over local networks. Saying Y here allows you to
- mount their file systems (often called "shares" in this context) and
- access them just like any other Unix directory. Currently, this
- works only if the Windows machines use TCP/IP as the underlying
- transport protocol, and not NetBEUI. For details, read
- <file:Documentation/filesystems/smbfs.txt> and the SMB-HOWTO,
- available from <http://www.tldp.org/docs.html#howto>.
-
- Note: if you just want your box to act as an SMB *server* and make
- files and printing services available to Windows clients (which need
- to have a TCP/IP stack), you don't need to say Y here; you can use
- the program SAMBA (available from <ftp://ftp.samba.org/pub/samba/>)
- for that.
-
- General information about how to connect Linux, Windows machines and
- Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
-
- To compile the SMB support as a module, choose M here:
- the module will be called smbfs. Most people say N, however.
-
-config SMB_NLS_DEFAULT
- bool "Use a default NLS"
- depends on SMB_FS
- help
- Enabling this will make smbfs use nls translations by default. You
- need to specify the local charset (CONFIG_NLS_DEFAULT) in the nls
- settings and you need to give the default nls for the SMB server as
- CONFIG_SMB_NLS_REMOTE.
-
- The nls settings can be changed at mount time, if your smbmount
- supports that, using the codepage and iocharset parameters.
-
- smbmount from samba 2.2.0 or later supports this.
-
-config SMB_NLS_REMOTE
- string "Default Remote NLS Option"
- depends on SMB_NLS_DEFAULT
- default "cp437"
- help
- This setting allows you to specify a default value for which
- codepage the server uses. If this field is left blank no
- translations will be done by default. The local codepage/charset
- default to CONFIG_NLS_DEFAULT.
-
- The nls settings can be changed at mount time, if your smbmount
- supports that, using the codepage and iocharset parameters.
-
- smbmount from samba 2.2.0 or later supports this.
diff --git a/fs/smbfs/Makefile b/fs/smbfs/Makefile
deleted file mode 100644
index 4faf8c4..0000000
--- a/fs/smbfs/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-# Makefile for the linux smb-filesystem routines.
-#
-
-obj-$(CONFIG_SMB_FS) += smbfs.o
-
-smbfs-objs := proc.o dir.o cache.o sock.o inode.o file.o ioctl.o getopt.o \
- symlink.o smbiod.o request.o
-
-# If you want debugging output, you may add these flags to the EXTRA_CFLAGS
-# SMBFS_PARANOIA should normally be enabled.
-
-EXTRA_CFLAGS += -DSMBFS_PARANOIA
-#EXTRA_CFLAGS += -DSMBFS_DEBUG
-#EXTRA_CFLAGS += -DSMBFS_DEBUG_VERBOSE
-#EXTRA_CFLAGS += -DDEBUG_SMB_TIMESTAMP
-#EXTRA_CFLAGS += -Werror
-
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
deleted file mode 100644
index 8c177eb..0000000
--- a/fs/smbfs/cache.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * cache.c
- *
- * Copyright (C) 1997 by Bill Hawes
- *
- * Routines to support directory cacheing using the page cache.
- * This cache code is almost directly taken from ncpfs.
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/smb_fs.h>
-#include <linux/pagemap.h>
-#include <linux/net.h>
-
-#include <asm/page.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-/*
- * Force the next attempt to use the cache to be a timeout.
- * If we can't find the page that's fine, it will cause a refresh.
- */
-void
-smb_invalid_dir_cache(struct inode * dir)
-{
- struct smb_sb_info *server = server_from_inode(dir);
- union smb_dir_cache *cache = NULL;
- struct page *page = NULL;
-
- page = grab_cache_page(&dir->i_data, 0);
- if (!page)
- goto out;
-
- if (!PageUptodate(page))
- goto out_unlock;
-
- cache = kmap(page);
- cache->head.time = jiffies - SMB_MAX_AGE(server);
-
- kunmap(page);
- SetPageUptodate(page);
-out_unlock:
- unlock_page(page);
- page_cache_release(page);
-out:
- return;
-}
-
-/*
- * Mark all dentries for 'parent' as invalid, forcing them to be re-read
- */
-void
-smb_invalidate_dircache_entries(struct dentry *parent)
-{
- struct smb_sb_info *server = server_from_dentry(parent);
- struct list_head *next;
- struct dentry *dentry;
-
- spin_lock(&dcache_lock);
- next = parent->d_subdirs.next;
- while (next != &parent->d_subdirs) {
- dentry = list_entry(next, struct dentry, d_u.d_child);
- dentry->d_fsdata = NULL;
- smb_age_dentry(server, dentry);
- next = next->next;
- }
- spin_unlock(&dcache_lock);
-}
-
-/*
- * dget, but require that fpos and parent matches what the dentry contains.
- * dentry is not known to be a valid pointer at entry.
- */
-struct dentry *
-smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
-{
- struct dentry *dent = dentry;
- struct list_head *next;
-
- if (d_validate(dent, parent)) {
- if (dent->d_name.len <= SMB_MAXNAMELEN &&
- (unsigned long)dent->d_fsdata == fpos) {
- if (!dent->d_inode) {
- dput(dent);
- dent = NULL;
- }
- return dent;
- }
- dput(dent);
- }
-
- /* If a pointer is invalid, we search the dentry. */
- spin_lock(&dcache_lock);
- next = parent->d_subdirs.next;
- while (next != &parent->d_subdirs) {
- dent = list_entry(next, struct dentry, d_u.d_child);
- if ((unsigned long)dent->d_fsdata == fpos) {
- if (dent->d_inode)
- dget_locked(dent);
- else
- dent = NULL;
- goto out_unlock;
- }
- next = next->next;
- }
- dent = NULL;
-out_unlock:
- spin_unlock(&dcache_lock);
- return dent;
-}
-
-
-/*
- * Create dentry/inode for this file and add it to the dircache.
- */
-int
-smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
- struct smb_cache_control *ctrl, struct qstr *qname,
- struct smb_fattr *entry)
-{
- struct dentry *newdent, *dentry = filp->f_path.dentry;
- struct inode *newino, *inode = dentry->d_inode;
- struct smb_cache_control ctl = *ctrl;
- int valid = 0;
- int hashed = 0;
- ino_t ino = 0;
-
- qname->hash = full_name_hash(qname->name, qname->len);
-
- if (dentry->d_op && dentry->d_op->d_hash)
- if (dentry->d_op->d_hash(dentry, qname) != 0)
- goto end_advance;
-
- newdent = d_lookup(dentry, qname);
-
- if (!newdent) {
- newdent = d_alloc(dentry, qname);
- if (!newdent)
- goto end_advance;
- } else {
- hashed = 1;
- memcpy((char *) newdent->d_name.name, qname->name,
- newdent->d_name.len);
- }
-
- if (!newdent->d_inode) {
- smb_renew_times(newdent);
- entry->f_ino = iunique(inode->i_sb, 2);
- newino = smb_iget(inode->i_sb, entry);
- if (newino) {
- smb_new_dentry(newdent);
- d_instantiate(newdent, newino);
- if (!hashed)
- d_rehash(newdent);
- }
- } else
- smb_set_inode_attr(newdent->d_inode, entry);
-
- if (newdent->d_inode) {
- ino = newdent->d_inode->i_ino;
- newdent->d_fsdata = (void *) ctl.fpos;
- smb_new_dentry(newdent);
- }
-
- if (ctl.idx >= SMB_DIRCACHE_SIZE) {
- if (ctl.page) {
- kunmap(ctl.page);
- SetPageUptodate(ctl.page);
- unlock_page(ctl.page);
- page_cache_release(ctl.page);
- }
- ctl.cache = NULL;
- ctl.idx -= SMB_DIRCACHE_SIZE;
- ctl.ofs += 1;
- ctl.page = grab_cache_page(&inode->i_data, ctl.ofs);
- if (ctl.page)
- ctl.cache = kmap(ctl.page);
- }
- if (ctl.cache) {
- ctl.cache->dentry[ctl.idx] = newdent;
- valid = 1;
- }
- dput(newdent);
-
-end_advance:
- if (!valid)
- ctl.valid = 0;
- if (!ctl.filled && (ctl.fpos == filp->f_pos)) {
- if (!ino)
- ino = find_inode_number(dentry, qname);
- if (!ino)
- ino = iunique(inode->i_sb, 2);
- ctl.filled = filldir(dirent, qname->name, qname->len,
- filp->f_pos, ino, DT_UNKNOWN);
- if (!ctl.filled)
- filp->f_pos += 1;
- }
- ctl.fpos += 1;
- ctl.idx += 1;
- *ctrl = ctl;
- return (ctl.valid || !ctl.filled);
-}
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
deleted file mode 100644
index f678d42..0000000
--- a/fs/smbfs/dir.c
+++ /dev/null
@@ -1,696 +0,0 @@
-/*
- * dir.c
- *
- * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- * Copyright (C) 1997 by Volker Lendecke
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/smp_lock.h>
-#include <linux/ctype.h>
-#include <linux/net.h>
-#include <linux/sched.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smb_mount.h>
-#include <linux/smbno.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-static int smb_readdir(struct file *, void *, filldir_t);
-static int smb_dir_open(struct inode *, struct file *);
-
-static struct dentry *smb_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int smb_create(struct inode *, struct dentry *, int, struct nameidata *);
-static int smb_mkdir(struct inode *, struct dentry *, int);
-static int smb_rmdir(struct inode *, struct dentry *);
-static int smb_unlink(struct inode *, struct dentry *);
-static int smb_rename(struct inode *, struct dentry *,
- struct inode *, struct dentry *);
-static int smb_make_node(struct inode *,struct dentry *,int,dev_t);
-static int smb_link(struct dentry *, struct inode *, struct dentry *);
-
-const struct file_operations smb_dir_operations =
-{
- .llseek = generic_file_llseek,
- .read = generic_read_dir,
- .readdir = smb_readdir,
- .unlocked_ioctl = smb_ioctl,
- .open = smb_dir_open,
-};
-
-const struct inode_operations smb_dir_inode_operations =
-{
- .create = smb_create,
- .lookup = smb_lookup,
- .unlink = smb_unlink,
- .mkdir = smb_mkdir,
- .rmdir = smb_rmdir,
- .rename = smb_rename,
- .getattr = smb_getattr,
- .setattr = smb_notify_change,
-};
-
-const struct inode_operations smb_dir_inode_operations_unix =
-{
- .create = smb_create,
- .lookup = smb_lookup,
- .unlink = smb_unlink,
- .mkdir = smb_mkdir,
- .rmdir = smb_rmdir,
- .rename = smb_rename,
- .getattr = smb_getattr,
- .setattr = smb_notify_change,
- .symlink = smb_symlink,
- .mknod = smb_make_node,
- .link = smb_link,
-};
-
-/*
- * Read a directory, using filldir to fill the dirent memory.
- * smb_proc_readdir does the actual reading from the smb server.
- *
- * The cache code is almost directly taken from ncpfs
- */
-static int
-smb_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *dir = dentry->d_inode;
- struct smb_sb_info *server = server_from_dentry(dentry);
- union smb_dir_cache *cache = NULL;
- struct smb_cache_control ctl;
- struct page *page = NULL;
- int result;
-
- ctl.page = NULL;
- ctl.cache = NULL;
-
- VERBOSE("reading %s/%s, f_pos=%d\n",
- DENTRY_PATH(dentry), (int) filp->f_pos);
-
- result = 0;
-
- lock_kernel();
-
- switch ((unsigned int) filp->f_pos) {
- case 0:
- if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0)
- goto out;
- filp->f_pos = 1;
- /* fallthrough */
- case 1:
- if (filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR) < 0)
- goto out;
- filp->f_pos = 2;
- }
-
- /*
- * Make sure our inode is up-to-date.
- */
- result = smb_revalidate_inode(dentry);
- if (result)
- goto out;
-
-
- page = grab_cache_page(&dir->i_data, 0);
- if (!page)
- goto read_really;
-
- ctl.cache = cache = kmap(page);
- ctl.head = cache->head;
-
- if (!PageUptodate(page) || !ctl.head.eof) {
- VERBOSE("%s/%s, page uptodate=%d, eof=%d\n",
- DENTRY_PATH(dentry), PageUptodate(page),ctl.head.eof);
- goto init_cache;
- }
-
- if (filp->f_pos == 2) {
- if (jiffies - ctl.head.time >= SMB_MAX_AGE(server))
- goto init_cache;
-
- /*
- * N.B. ncpfs checks mtime of dentry too here, we don't.
- * 1. common smb servers do not update mtime on dir changes
- * 2. it requires an extra smb request
- * (revalidate has the same timeout as ctl.head.time)
- *
- * Instead smbfs invalidates its own cache on local changes
- * and remote changes are not seen until timeout.
- */
- }
-
- if (filp->f_pos > ctl.head.end)
- goto finished;
-
- ctl.fpos = filp->f_pos + (SMB_DIRCACHE_START - 2);
- ctl.ofs = ctl.fpos / SMB_DIRCACHE_SIZE;
- ctl.idx = ctl.fpos % SMB_DIRCACHE_SIZE;
-
- for (;;) {
- if (ctl.ofs != 0) {
- ctl.page = find_lock_page(&dir->i_data, ctl.ofs);
- if (!ctl.page)
- goto invalid_cache;
- ctl.cache = kmap(ctl.page);
- if (!PageUptodate(ctl.page))
- goto invalid_cache;
- }
- while (ctl.idx < SMB_DIRCACHE_SIZE) {
- struct dentry *dent;
- int res;
-
- dent = smb_dget_fpos(ctl.cache->dentry[ctl.idx],
- dentry, filp->f_pos);
- if (!dent)
- goto invalid_cache;
-
- res = filldir(dirent, dent->d_name.name,
- dent->d_name.len, filp->f_pos,
- dent->d_inode->i_ino, DT_UNKNOWN);
- dput(dent);
- if (res)
- goto finished;
- filp->f_pos += 1;
- ctl.idx += 1;
- if (filp->f_pos > ctl.head.end)
- goto finished;
- }
- if (ctl.page) {
- kunmap(ctl.page);
- SetPageUptodate(ctl.page);
- unlock_page(ctl.page);
- page_cache_release(ctl.page);
- ctl.page = NULL;
- }
- ctl.idx = 0;
- ctl.ofs += 1;
- }
-invalid_cache:
- if (ctl.page) {
- kunmap(ctl.page);
- unlock_page(ctl.page);
- page_cache_release(ctl.page);
- ctl.page = NULL;
- }
- ctl.cache = cache;
-init_cache:
- smb_invalidate_dircache_entries(dentry);
- ctl.head.time = jiffies;
- ctl.head.eof = 0;
- ctl.fpos = 2;
- ctl.ofs = 0;
- ctl.idx = SMB_DIRCACHE_START;
- ctl.filled = 0;
- ctl.valid = 1;
-read_really:
- result = server->ops->readdir(filp, dirent, filldir, &ctl);
- if (result == -ERESTARTSYS && page)
- ClearPageUptodate(page);
- if (ctl.idx == -1)
- goto invalid_cache; /* retry */
- ctl.head.end = ctl.fpos - 1;
- ctl.head.eof = ctl.valid;
-finished:
- if (page) {
- cache->head = ctl.head;
- kunmap(page);
- if (result != -ERESTARTSYS)
- SetPageUptodate(page);
- unlock_page(page);
- page_cache_release(page);
- }
- if (ctl.page) {
- kunmap(ctl.page);
- SetPageUptodate(ctl.page);
- unlock_page(ctl.page);
- page_cache_release(ctl.page);
- }
-out:
- unlock_kernel();
- return result;
-}
-
-static int
-smb_dir_open(struct inode *dir, struct file *file)
-{
- struct dentry *dentry = file->f_path.dentry;
- struct smb_sb_info *server;
- int error = 0;
-
- VERBOSE("(%s/%s)\n", dentry->d_parent->d_name.name,
- file->f_path.dentry->d_name.name);
-
- /*
- * Directory timestamps in the core protocol aren't updated
- * when a file is added, so we give them a very short TTL.
- */
- lock_kernel();
- server = server_from_dentry(dentry);
- if (server->opt.protocol < SMB_PROTOCOL_LANMAN2) {
- unsigned long age = jiffies - SMB_I(dir)->oldmtime;
- if (age > 2*HZ)
- smb_invalid_dir_cache(dir);
- }
-
- /*
- * Note: in order to allow the smbmount process to open the
- * mount point, we only revalidate if the connection is valid or
- * if the process is trying to access something other than the root.
- */
- if (server->state == CONN_VALID || !IS_ROOT(dentry))
- error = smb_revalidate_inode(dentry);
- unlock_kernel();
- return error;
-}
-
-/*
- * Dentry operations routines
- */
-static int smb_lookup_validate(struct dentry *, struct nameidata *);
-static int smb_hash_dentry(struct dentry *, struct qstr *);
-static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
-static int smb_delete_dentry(struct dentry *);
-
-static const struct dentry_operations smbfs_dentry_operations =
-{
- .d_revalidate = smb_lookup_validate,
- .d_hash = smb_hash_dentry,
- .d_compare = smb_compare_dentry,
- .d_delete = smb_delete_dentry,
-};
-
-static const struct dentry_operations smbfs_dentry_operations_case =
-{
- .d_revalidate = smb_lookup_validate,
- .d_delete = smb_delete_dentry,
-};
-
-
-/*
- * This is the callback when the dcache has a lookup hit.
- */
-static int
-smb_lookup_validate(struct dentry * dentry, struct nameidata *nd)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- struct inode * inode = dentry->d_inode;
- unsigned long age = jiffies - dentry->d_time;
- int valid;
-
- /*
- * The default validation is based on dentry age:
- * we believe in dentries for a few seconds. (But each
- * successful server lookup renews the timestamp.)
- */
- valid = (age <= SMB_MAX_AGE(server));
-#ifdef SMBFS_DEBUG_VERBOSE
- if (!valid)
- VERBOSE("%s/%s not valid, age=%lu\n",
- DENTRY_PATH(dentry), age);
-#endif
-
- if (inode) {
- lock_kernel();
- if (is_bad_inode(inode)) {
- PARANOIA("%s/%s has dud inode\n", DENTRY_PATH(dentry));
- valid = 0;
- } else if (!valid)
- valid = (smb_revalidate_inode(dentry) == 0);
- unlock_kernel();
- } else {
- /*
- * What should we do for negative dentries?
- */
- }
- return valid;
-}
-
-static int
-smb_hash_dentry(struct dentry *dir, struct qstr *this)
-{
- unsigned long hash;
- int i;
-
- hash = init_name_hash();
- for (i=0; i < this->len ; i++)
- hash = partial_name_hash(tolower(this->name[i]), hash);
- this->hash = end_name_hash(hash);
-
- return 0;
-}
-
-static int
-smb_compare_dentry(struct dentry *dir, struct qstr *a, struct qstr *b)
-{
- int i, result = 1;
-
- if (a->len != b->len)
- goto out;
- for (i=0; i < a->len; i++) {
- if (tolower(a->name[i]) != tolower(b->name[i]))
- goto out;
- }
- result = 0;
-out:
- return result;
-}
-
-/*
- * This is the callback from dput() when d_count is going to 0.
- * We use this to unhash dentries with bad inodes.
- */
-static int
-smb_delete_dentry(struct dentry * dentry)
-{
- if (dentry->d_inode) {
- if (is_bad_inode(dentry->d_inode)) {
- PARANOIA("bad inode, unhashing %s/%s\n",
- DENTRY_PATH(dentry));
- return 1;
- }
- } else {
- /* N.B. Unhash negative dentries? */
- }
- return 0;
-}
-
-/*
- * Initialize a new dentry
- */
-void
-smb_new_dentry(struct dentry *dentry)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
-
- if (server->mnt->flags & SMB_MOUNT_CASE)
- dentry->d_op = &smbfs_dentry_operations_case;
- else
- dentry->d_op = &smbfs_dentry_operations;
- dentry->d_time = jiffies;
-}
-
-
-/*
- * Whenever a lookup succeeds, we know the parent directories
- * are all valid, so we want to update the dentry timestamps.
- * N.B. Move this to dcache?
- */
-void
-smb_renew_times(struct dentry * dentry)
-{
- dget(dentry);
- dentry->d_time = jiffies;
-
- while (!IS_ROOT(dentry)) {
- struct dentry *parent = dget_parent(dentry);
- dput(dentry);
- dentry = parent;
-
- dentry->d_time = jiffies;
- }
- dput(dentry);
-}
-
-static struct dentry *
-smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
-{
- struct smb_fattr finfo;
- struct inode *inode;
- int error;
- struct smb_sb_info *server;
-
- error = -ENAMETOOLONG;
- if (dentry->d_name.len > SMB_MAXNAMELEN)
- goto out;
-
- /* Do not allow lookup of names with backslashes in */
- error = -EINVAL;
- if (memchr(dentry->d_name.name, '\\', dentry->d_name.len))
- goto out;
-
- lock_kernel();
- error = smb_proc_getattr(dentry, &finfo);
-#ifdef SMBFS_PARANOIA
- if (error && error != -ENOENT)
- PARANOIA("find %s/%s failed, error=%d\n",
- DENTRY_PATH(dentry), error);
-#endif
-
- inode = NULL;
- if (error == -ENOENT)
- goto add_entry;
- if (!error) {
- error = -EACCES;
- finfo.f_ino = iunique(dentry->d_sb, 2);
- inode = smb_iget(dir->i_sb, &finfo);
- if (inode) {
- add_entry:
- server = server_from_dentry(dentry);
- if (server->mnt->flags & SMB_MOUNT_CASE)
- dentry->d_op = &smbfs_dentry_operations_case;
- else
- dentry->d_op = &smbfs_dentry_operations;
-
- d_add(dentry, inode);
- smb_renew_times(dentry);
- error = 0;
- }
- }
- unlock_kernel();
-out:
- return ERR_PTR(error);
-}
-
-/*
- * This code is common to all routines creating a new inode.
- */
-static int
-smb_instantiate(struct dentry *dentry, __u16 fileid, int have_id)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- struct inode *inode;
- int error;
- struct smb_fattr fattr;
-
- VERBOSE("file %s/%s, fileid=%u\n", DENTRY_PATH(dentry), fileid);
-
- error = smb_proc_getattr(dentry, &fattr);
- if (error)
- goto out_close;
-
- smb_renew_times(dentry);
- fattr.f_ino = iunique(dentry->d_sb, 2);
- inode = smb_iget(dentry->d_sb, &fattr);
- if (!inode)
- goto out_no_inode;
-
- if (have_id) {
- struct smb_inode_info *ei = SMB_I(inode);
- ei->fileid = fileid;
- ei->access = SMB_O_RDWR;
- ei->open = server->generation;
- }
- d_instantiate(dentry, inode);
-out:
- return error;
-
-out_no_inode:
- error = -EACCES;
-out_close:
- if (have_id) {
- PARANOIA("%s/%s failed, error=%d, closing %u\n",
- DENTRY_PATH(dentry), error, fileid);
- smb_close_fileid(dentry, fileid);
- }
- goto out;
-}
-
-/* N.B. How should the mode argument be used? */
-static int
-smb_create(struct inode *dir, struct dentry *dentry, int mode,
- struct nameidata *nd)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- __u16 fileid;
- int error;
- struct iattr attr;
-
- VERBOSE("creating %s/%s, mode=%d\n", DENTRY_PATH(dentry), mode);
-
- lock_kernel();
- smb_invalid_dir_cache(dir);
- error = smb_proc_create(dentry, 0, get_seconds(), &fileid);
- if (!error) {
- if (server->opt.capabilities & SMB_CAP_UNIX) {
- /* Set attributes for new file */
- attr.ia_valid = ATTR_MODE;
- attr.ia_mode = mode;
- error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
- }
- error = smb_instantiate(dentry, fileid, 1);
- } else {
- PARANOIA("%s/%s failed, error=%d\n",
- DENTRY_PATH(dentry), error);
- }
- unlock_kernel();
- return error;
-}
-
-/* N.B. How should the mode argument be used? */
-static int
-smb_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- int error;
- struct iattr attr;
-
- lock_kernel();
- smb_invalid_dir_cache(dir);
- error = smb_proc_mkdir(dentry);
- if (!error) {
- if (server->opt.capabilities & SMB_CAP_UNIX) {
- /* Set attributes for new directory */
- attr.ia_valid = ATTR_MODE;
- attr.ia_mode = mode;
- error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
- }
- error = smb_instantiate(dentry, 0, 0);
- }
- unlock_kernel();
- return error;
-}
-
-static int
-smb_rmdir(struct inode *dir, struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- int error;
-
- /*
- * Close the directory if it's open.
- */
- lock_kernel();
- smb_close(inode);
-
- /*
- * Check that nobody else is using the directory..
- */
- error = -EBUSY;
- if (!d_unhashed(dentry))
- goto out;
-
- smb_invalid_dir_cache(dir);
- error = smb_proc_rmdir(dentry);
-
-out:
- unlock_kernel();
- return error;
-}
-
-static int
-smb_unlink(struct inode *dir, struct dentry *dentry)
-{
- int error;
-
- /*
- * Close the file if it's open.
- */
- lock_kernel();
- smb_close(dentry->d_inode);
-
- smb_invalid_dir_cache(dir);
- error = smb_proc_unlink(dentry);
- if (!error)
- smb_renew_times(dentry);
- unlock_kernel();
- return error;
-}
-
-static int
-smb_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
-{
- int error;
-
- /*
- * Close any open files, and check whether to delete the
- * target before attempting the rename.
- */
- lock_kernel();
- if (old_dentry->d_inode)
- smb_close(old_dentry->d_inode);
- if (new_dentry->d_inode) {
- smb_close(new_dentry->d_inode);
- error = smb_proc_unlink(new_dentry);
- if (error) {
- VERBOSE("unlink %s/%s, error=%d\n",
- DENTRY_PATH(new_dentry), error);
- goto out;
- }
- /* FIXME */
- d_delete(new_dentry);
- }
-
- smb_invalid_dir_cache(old_dir);
- smb_invalid_dir_cache(new_dir);
- error = smb_proc_mv(old_dentry, new_dentry);
- if (!error) {
- smb_renew_times(old_dentry);
- smb_renew_times(new_dentry);
- }
-out:
- unlock_kernel();
- return error;
-}
-
-/*
- * FIXME: samba servers won't let you create device nodes unless uid/gid
- * matches the connection credentials (and we don't know which those are ...)
- */
-static int
-smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
-{
- int error;
- struct iattr attr;
-
- attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
- attr.ia_mode = mode;
- current_euid_egid(&attr.ia_uid, &attr.ia_gid);
-
- if (!new_valid_dev(dev))
- return -EINVAL;
-
- smb_invalid_dir_cache(dir);
- error = smb_proc_setattr_unix(dentry, &attr, MAJOR(dev), MINOR(dev));
- if (!error) {
- error = smb_instantiate(dentry, 0, 0);
- }
- return error;
-}
-
-/*
- * dentry = existing file
- * new_dentry = new file
- */
-static int
-smb_link(struct dentry *dentry, struct inode *dir, struct dentry *new_dentry)
-{
- int error;
-
- DEBUG1("smb_link old=%s/%s new=%s/%s\n",
- DENTRY_PATH(dentry), DENTRY_PATH(new_dentry));
- smb_invalid_dir_cache(dir);
- error = smb_proc_link(server_from_dentry(dentry), dentry, new_dentry);
- if (!error) {
- smb_renew_times(dentry);
- error = smb_instantiate(new_dentry, 0, 0);
- }
- return error;
-}
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
deleted file mode 100644
index 8e187a0..0000000
--- a/fs/smbfs/file.c
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * file.c
- *
- * Copyright (C) 1995, 1996, 1997 by Paal-Kr. Engstad and Volker Lendecke
- * Copyright (C) 1997 by Volker Lendecke
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
-#include <linux/net.h>
-#include <linux/aio.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <linux/smbno.h>
-#include <linux/smb_fs.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-static int
-smb_fsync(struct file *file, int datasync)
-{
- struct dentry *dentry = file->f_path.dentry;
- struct smb_sb_info *server = server_from_dentry(dentry);
- int result;
-
- VERBOSE("sync file %s/%s\n", DENTRY_PATH(dentry));
-
- /*
- * The VFS will writepage() all dirty pages for us, but we
- * should send a SMBflush to the server, letting it know that
- * we want things synchronized with actual storage.
- *
- * Note: this function requires all pages to have been written already
- * (should be ok with writepage_sync)
- */
- result = smb_proc_flush(server, SMB_I(dentry->d_inode)->fileid);
- return result;
-}
-
-/*
- * Read a page synchronously.
- */
-static int
-smb_readpage_sync(struct dentry *dentry, struct page *page)
-{
- char *buffer = kmap(page);
- loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
- struct smb_sb_info *server = server_from_dentry(dentry);
- unsigned int rsize = smb_get_rsize(server);
- int count = PAGE_SIZE;
- int result;
-
- VERBOSE("file %s/%s, count=%d@%Ld, rsize=%d\n",
- DENTRY_PATH(dentry), count, offset, rsize);
-
- result = smb_open(dentry, SMB_O_RDONLY);
- if (result < 0)
- goto io_error;
-
- do {
- if (count < rsize)
- rsize = count;
-
- result = server->ops->read(dentry->d_inode,offset,rsize,buffer);
- if (result < 0)
- goto io_error;
-
- count -= result;
- offset += result;
- buffer += result;
- dentry->d_inode->i_atime =
- current_fs_time(dentry->d_inode->i_sb);
- if (result < rsize)
- break;
- } while (count);
-
- memset(buffer, 0, count);
- flush_dcache_page(page);
- SetPageUptodate(page);
- result = 0;
-
-io_error:
- kunmap(page);
- unlock_page(page);
- return result;
-}
-
-/*
- * We are called with the page locked and we unlock it when done.
- */
-static int
-smb_readpage(struct file *file, struct page *page)
-{
- int error;
- struct dentry *dentry = file->f_path.dentry;
-
- page_cache_get(page);
- error = smb_readpage_sync(dentry, page);
- page_cache_release(page);
- return error;
-}
-
-/*
- * Write a page synchronously.
- * Offset is the data offset within the page.
- */
-static int
-smb_writepage_sync(struct inode *inode, struct page *page,
- unsigned long pageoffset, unsigned int count)
-{
- loff_t offset;
- char *buffer = kmap(page) + pageoffset;
- struct smb_sb_info *server = server_from_inode(inode);
- unsigned int wsize = smb_get_wsize(server);
- int ret = 0;
-
- offset = ((loff_t)page->index << PAGE_CACHE_SHIFT) + pageoffset;
- VERBOSE("file ino=%ld, fileid=%d, count=%d@%Ld, wsize=%d\n",
- inode->i_ino, SMB_I(inode)->fileid, count, offset, wsize);
-
- do {
- int write_ret;
-
- if (count < wsize)
- wsize = count;
-
- write_ret = server->ops->write(inode, offset, wsize, buffer);
- if (write_ret < 0) {
- PARANOIA("failed write, wsize=%d, write_ret=%d\n",
- wsize, write_ret);
- ret = write_ret;
- break;
- }
- /* N.B. what if result < wsize?? */
-#ifdef SMBFS_PARANOIA
- if (write_ret < wsize)
- PARANOIA("short write, wsize=%d, write_ret=%d\n",
- wsize, write_ret);
-#endif
- buffer += wsize;
- offset += wsize;
- count -= wsize;
- /*
- * Update the inode now rather than waiting for a refresh.
- */
- inode->i_mtime = inode->i_atime = current_fs_time(inode->i_sb);
- SMB_I(inode)->flags |= SMB_F_LOCALWRITE;
- if (offset > inode->i_size)
- inode->i_size = offset;
- } while (count);
-
- kunmap(page);
- return ret;
-}
-
-/*
- * Write a page to the server. This will be used for NFS swapping only
- * (for now), and we currently do this synchronously only.
- *
- * We are called with the page locked and we unlock it when done.
- */
-static int
-smb_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct address_space *mapping = page->mapping;
- struct inode *inode;
- unsigned long end_index;
- unsigned offset = PAGE_CACHE_SIZE;
- int err;
-
- BUG_ON(!mapping);
- inode = mapping->host;
- BUG_ON(!inode);
-
- end_index = inode->i_size >> PAGE_CACHE_SHIFT;
-
- /* easy case */
- if (page->index < end_index)
- goto do_it;
- /* things got complicated... */
- offset = inode->i_size & (PAGE_CACHE_SIZE-1);
- /* OK, are we completely out? */
- if (page->index >= end_index+1 || !offset)
- return 0; /* truncated - don't care */
-do_it:
- page_cache_get(page);
- err = smb_writepage_sync(inode, page, 0, offset);
- SetPageUptodate(page);
- unlock_page(page);
- page_cache_release(page);
- return err;
-}
-
-static int
-smb_updatepage(struct file *file, struct page *page, unsigned long offset,
- unsigned int count)
-{
- struct dentry *dentry = file->f_path.dentry;
-
- DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
- ((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
-
- return smb_writepage_sync(dentry->d_inode, page, offset, count);
-}
-
-static ssize_t
-smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- struct file * file = iocb->ki_filp;
- struct dentry * dentry = file->f_path.dentry;
- ssize_t status;
-
- VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry),
- (unsigned long) iocb->ki_left, (unsigned long) pos);
-
- status = smb_revalidate_inode(dentry);
- if (status) {
- PARANOIA("%s/%s validation failed, error=%Zd\n",
- DENTRY_PATH(dentry), status);
- goto out;
- }
-
- VERBOSE("before read, size=%ld, flags=%x, atime=%ld\n",
- (long)dentry->d_inode->i_size,
- dentry->d_inode->i_flags, dentry->d_inode->i_atime.tv_sec);
-
- status = generic_file_aio_read(iocb, iov, nr_segs, pos);
-out:
- return status;
-}
-
-static int
-smb_file_mmap(struct file * file, struct vm_area_struct * vma)
-{
- struct dentry * dentry = file->f_path.dentry;
- int status;
-
- VERBOSE("file %s/%s, address %lu - %lu\n",
- DENTRY_PATH(dentry), vma->vm_start, vma->vm_end);
-
- status = smb_revalidate_inode(dentry);
- if (status) {
- PARANOIA("%s/%s validation failed, error=%d\n",
- DENTRY_PATH(dentry), status);
- goto out;
- }
- status = generic_file_mmap(file, vma);
-out:
- return status;
-}
-
-static ssize_t
-smb_file_splice_read(struct file *file, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t count,
- unsigned int flags)
-{
- struct dentry *dentry = file->f_path.dentry;
- ssize_t status;
-
- VERBOSE("file %s/%s, pos=%Ld, count=%lu\n",
- DENTRY_PATH(dentry), *ppos, count);
-
- status = smb_revalidate_inode(dentry);
- if (status) {
- PARANOIA("%s/%s validation failed, error=%Zd\n",
- DENTRY_PATH(dentry), status);
- goto out;
- }
- status = generic_file_splice_read(file, ppos, pipe, count, flags);
-out:
- return status;
-}
-
-/*
- * This does the "real" work of the write. The generic routine has
- * allocated the page, locked it, done all the page alignment stuff
- * calculations etc. Now we should just copy the data from user
- * space and write it back to the real medium..
- *
- * If the writer ends up delaying the write, the writer needs to
- * increment the page use counts until he is done with the page.
- */
-static int smb_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
-{
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- *pagep = grab_cache_page_write_begin(mapping, index, flags);
- if (!*pagep)
- return -ENOMEM;
- return 0;
-}
-
-static int smb_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- int status;
- unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
-
- lock_kernel();
- status = smb_updatepage(file, page, offset, copied);
- unlock_kernel();
-
- if (!status) {
- if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
- SetPageUptodate(page);
- status = copied;
- }
-
- unlock_page(page);
- page_cache_release(page);
-
- return status;
-}
-
-const struct address_space_operations smb_file_aops = {
- .readpage = smb_readpage,
- .writepage = smb_writepage,
- .write_begin = smb_write_begin,
- .write_end = smb_write_end,
-};
-
-/*
- * Write to a file (through the page cache).
- */
-static ssize_t
-smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- struct file * file = iocb->ki_filp;
- struct dentry * dentry = file->f_path.dentry;
- ssize_t result;
-
- VERBOSE("file %s/%s, count=%lu@%lu\n",
- DENTRY_PATH(dentry),
- (unsigned long) iocb->ki_left, (unsigned long) pos);
-
- result = smb_revalidate_inode(dentry);
- if (result) {
- PARANOIA("%s/%s validation failed, error=%Zd\n",
- DENTRY_PATH(dentry), result);
- goto out;
- }
-
- result = smb_open(dentry, SMB_O_WRONLY);
- if (result)
- goto out;
-
- if (iocb->ki_left > 0) {
- result = generic_file_aio_write(iocb, iov, nr_segs, pos);
- VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n",
- (long) file->f_pos, (long) dentry->d_inode->i_size,
- dentry->d_inode->i_mtime.tv_sec,
- dentry->d_inode->i_atime.tv_sec);
- }
-out:
- return result;
-}
-
-static int
-smb_file_open(struct inode *inode, struct file * file)
-{
- int result;
- struct dentry *dentry = file->f_path.dentry;
- int smb_mode = (file->f_mode & O_ACCMODE) - 1;
-
- lock_kernel();
- result = smb_open(dentry, smb_mode);
- if (result)
- goto out;
- SMB_I(inode)->openers++;
-out:
- unlock_kernel();
- return result;
-}
-
-static int
-smb_file_release(struct inode *inode, struct file * file)
-{
- lock_kernel();
- if (!--SMB_I(inode)->openers) {
- /* We must flush any dirty pages now as we won't be able to
- write anything after close. mmap can trigger this.
- "openers" should perhaps include mmap'ers ... */
- filemap_write_and_wait(inode->i_mapping);
- smb_close(inode);
- }
- unlock_kernel();
- return 0;
-}
-
-/*
- * Check whether the required access is compatible with
- * an inode's permission. SMB doesn't recognize superuser
- * privileges, so we need our own check for this.
- */
-static int
-smb_file_permission(struct inode *inode, int mask)
-{
- int mode = inode->i_mode;
- int error = 0;
-
- VERBOSE("mode=%x, mask=%x\n", mode, mask);
-
- /* Look at user permissions */
- mode >>= 6;
- if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
- error = -EACCES;
- return error;
-}
-
-static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin)
-{
- loff_t ret;
- lock_kernel();
- ret = generic_file_llseek_unlocked(file, offset, origin);
- unlock_kernel();
- return ret;
-}
-
-const struct file_operations smb_file_operations =
-{
- .llseek = smb_remote_llseek,
- .read = do_sync_read,
- .aio_read = smb_file_aio_read,
- .write = do_sync_write,
- .aio_write = smb_file_aio_write,
- .unlocked_ioctl = smb_ioctl,
- .mmap = smb_file_mmap,
- .open = smb_file_open,
- .release = smb_file_release,
- .fsync = smb_fsync,
- .splice_read = smb_file_splice_read,
-};
-
-const struct inode_operations smb_file_inode_operations =
-{
- .permission = smb_file_permission,
- .getattr = smb_getattr,
- .setattr = smb_notify_change,
-};
diff --git a/fs/smbfs/getopt.c b/fs/smbfs/getopt.c
deleted file mode 100644
index 7ae0f52..0000000
--- a/fs/smbfs/getopt.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * getopt.c
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/net.h>
-
-#include "getopt.h"
-
-/**
- * smb_getopt - option parser
- * @caller: name of the caller, for error messages
- * @options: the options string
- * @opts: an array of &struct option entries controlling parser operations
- * @optopt: output; will contain the current option
- * @optarg: output; will contain the value (if one exists)
- * @flag: output; may be NULL; should point to a long for or'ing flags
- * @value: output; may be NULL; will be overwritten with the integer value
- * of the current argument.
- *
- * Helper to parse options on the format used by mount ("a=b,c=d,e,f").
- * Returns opts->val if a matching entry in the 'opts' array is found,
- * 0 when no more tokens are found, -1 if an error is encountered.
- */
-int smb_getopt(char *caller, char **options, struct option *opts,
- char **optopt, char **optarg, unsigned long *flag,
- unsigned long *value)
-{
- char *token;
- char *val;
- int i;
-
- do {
- if ((token = strsep(options, ",")) == NULL)
- return 0;
- } while (*token == '\0');
- *optopt = token;
-
- *optarg = NULL;
- if ((val = strchr (token, '=')) != NULL) {
- *val++ = 0;
- if (value)
- *value = simple_strtoul(val, NULL, 0);
- *optarg = val;
- }
-
- for (i = 0; opts[i].name != NULL; i++) {
- if (!strcmp(opts[i].name, token)) {
- if (!opts[i].flag && (!val || !*val)) {
- printk("%s: the %s option requires an argument\n",
- caller, token);
- return -1;
- }
-
- if (flag && opts[i].flag)
- *flag |= opts[i].flag;
-
- return opts[i].val;
- }
- }
- printk("%s: Unrecognized mount option %s\n", caller, token);
- return -1;
-}
diff --git a/fs/smbfs/getopt.h b/fs/smbfs/getopt.h
deleted file mode 100644
index 146219a..0000000
--- a/fs/smbfs/getopt.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _LINUX_GETOPT_H
-#define _LINUX_GETOPT_H
-
-struct option {
- const char *name;
- unsigned long flag;
- int val;
-};
-
-extern int smb_getopt(char *caller, char **options, struct option *opts,
- char **optopt, char **optarg, unsigned long *flag,
- unsigned long *value);
-
-#endif /* _LINUX_GETOPT_H */
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
deleted file mode 100644
index f6e9ee5..0000000
--- a/fs/smbfs/inode.c
+++ /dev/null
@@ -1,843 +0,0 @@
-/*
- * inode.c
- *
- * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- * Copyright (C) 1997 by Volker Lendecke
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/module.h>
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/file.h>
-#include <linux/dcache.h>
-#include <linux/smp_lock.h>
-#include <linux/nls.h>
-#include <linux/seq_file.h>
-#include <linux/mount.h>
-#include <linux/net.h>
-#include <linux/vfs.h>
-#include <linux/highuid.h>
-#include <linux/sched.h>
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include "smb_debug.h"
-#include "getopt.h"
-#include "proto.h"
-
-/* Always pick a default string */
-#ifdef CONFIG_SMB_NLS_REMOTE
-#define SMB_NLS_REMOTE CONFIG_SMB_NLS_REMOTE
-#else
-#define SMB_NLS_REMOTE ""
-#endif
-
-#define SMB_TTL_DEFAULT 1000
-
-static void smb_evict_inode(struct inode *);
-static void smb_put_super(struct super_block *);
-static int smb_statfs(struct dentry *, struct kstatfs *);
-static int smb_show_options(struct seq_file *, struct vfsmount *);
-
-static struct kmem_cache *smb_inode_cachep;
-
-static struct inode *smb_alloc_inode(struct super_block *sb)
-{
- struct smb_inode_info *ei;
- ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, GFP_KERNEL);
- if (!ei)
- return NULL;
- return &ei->vfs_inode;
-}
-
-static void smb_destroy_inode(struct inode *inode)
-{
- kmem_cache_free(smb_inode_cachep, SMB_I(inode));
-}
-
-static void init_once(void *foo)
-{
- struct smb_inode_info *ei = (struct smb_inode_info *) foo;
-
- inode_init_once(&ei->vfs_inode);
-}
-
-static int init_inodecache(void)
-{
- smb_inode_cachep = kmem_cache_create("smb_inode_cache",
- sizeof(struct smb_inode_info),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
- init_once);
- if (smb_inode_cachep == NULL)
- return -ENOMEM;
- return 0;
-}
-
-static void destroy_inodecache(void)
-{
- kmem_cache_destroy(smb_inode_cachep);
-}
-
-static int smb_remount(struct super_block *sb, int *flags, char *data)
-{
- *flags |= MS_NODIRATIME;
- return 0;
-}
-
-static const struct super_operations smb_sops =
-{
- .alloc_inode = smb_alloc_inode,
- .destroy_inode = smb_destroy_inode,
- .drop_inode = generic_delete_inode,
- .evict_inode = smb_evict_inode,
- .put_super = smb_put_super,
- .statfs = smb_statfs,
- .show_options = smb_show_options,
- .remount_fs = smb_remount,
-};
-
-
-/* We are always generating a new inode here */
-struct inode *
-smb_iget(struct super_block *sb, struct smb_fattr *fattr)
-{
- struct smb_sb_info *server = SMB_SB(sb);
- struct inode *result;
-
- DEBUG1("smb_iget: %p\n", fattr);
-
- result = new_inode(sb);
- if (!result)
- return result;
- result->i_ino = fattr->f_ino;
- SMB_I(result)->open = 0;
- SMB_I(result)->fileid = 0;
- SMB_I(result)->access = 0;
- SMB_I(result)->flags = 0;
- SMB_I(result)->closed = 0;
- SMB_I(result)->openers = 0;
- smb_set_inode_attr(result, fattr);
- if (S_ISREG(result->i_mode)) {
- result->i_op = &smb_file_inode_operations;
- result->i_fop = &smb_file_operations;
- result->i_data.a_ops = &smb_file_aops;
- } else if (S_ISDIR(result->i_mode)) {
- if (server->opt.capabilities & SMB_CAP_UNIX)
- result->i_op = &smb_dir_inode_operations_unix;
- else
- result->i_op = &smb_dir_inode_operations;
- result->i_fop = &smb_dir_operations;
- } else if (S_ISLNK(result->i_mode)) {
- result->i_op = &smb_link_inode_operations;
- } else {
- init_special_inode(result, result->i_mode, fattr->f_rdev);
- }
- insert_inode_hash(result);
- return result;
-}
-
-/*
- * Copy the inode data to a smb_fattr structure.
- */
-void
-smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr)
-{
- memset(fattr, 0, sizeof(struct smb_fattr));
- fattr->f_mode = inode->i_mode;
- fattr->f_nlink = inode->i_nlink;
- fattr->f_ino = inode->i_ino;
- fattr->f_uid = inode->i_uid;
- fattr->f_gid = inode->i_gid;
- fattr->f_size = inode->i_size;
- fattr->f_mtime = inode->i_mtime;
- fattr->f_ctime = inode->i_ctime;
- fattr->f_atime = inode->i_atime;
- fattr->f_blocks = inode->i_blocks;
-
- fattr->attr = SMB_I(inode)->attr;
- /*
- * Keep the attributes in sync with the inode permissions.
- */
- if (fattr->f_mode & S_IWUSR)
- fattr->attr &= ~aRONLY;
- else
- fattr->attr |= aRONLY;
-}
-
-/*
- * Update the inode, possibly causing it to invalidate its pages if mtime/size
- * is different from last time.
- */
-void
-smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
-{
- struct smb_inode_info *ei = SMB_I(inode);
-
- /*
- * A size change should have a different mtime, or same mtime
- * but different size.
- */
- time_t last_time = inode->i_mtime.tv_sec;
- loff_t last_sz = inode->i_size;
-
- inode->i_mode = fattr->f_mode;
- inode->i_nlink = fattr->f_nlink;
- inode->i_uid = fattr->f_uid;
- inode->i_gid = fattr->f_gid;
- inode->i_ctime = fattr->f_ctime;
- inode->i_blocks = fattr->f_blocks;
- inode->i_size = fattr->f_size;
- inode->i_mtime = fattr->f_mtime;
- inode->i_atime = fattr->f_atime;
- ei->attr = fattr->attr;
-
- /*
- * Update the "last time refreshed" field for revalidation.
- */
- ei->oldmtime = jiffies;
-
- if (inode->i_mtime.tv_sec != last_time || inode->i_size != last_sz) {
- VERBOSE("%ld changed, old=%ld, new=%ld, oz=%ld, nz=%ld\n",
- inode->i_ino,
- (long) last_time, (long) inode->i_mtime.tv_sec,
- (long) last_sz, (long) inode->i_size);
-
- if (!S_ISDIR(inode->i_mode))
- invalidate_remote_inode(inode);
- }
-}
-
-/*
- * This is called if the connection has gone bad ...
- * try to kill off all the current inodes.
- */
-void
-smb_invalidate_inodes(struct smb_sb_info *server)
-{
- VERBOSE("\n");
- shrink_dcache_sb(SB_of(server));
-}
-
-/*
- * This is called to update the inode attributes after
- * we've made changes to a file or directory.
- */
-static int
-smb_refresh_inode(struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- int error;
- struct smb_fattr fattr;
-
- error = smb_proc_getattr(dentry, &fattr);
- if (!error) {
- smb_renew_times(dentry);
- /*
- * Check whether the type part of the mode changed,
- * and don't update the attributes if it did.
- *
- * And don't dick with the root inode
- */
- if (inode->i_ino == 2)
- return error;
- if (S_ISLNK(inode->i_mode))
- return error; /* VFS will deal with it */
-
- if ((inode->i_mode & S_IFMT) == (fattr.f_mode & S_IFMT)) {
- smb_set_inode_attr(inode, &fattr);
- } else {
- /*
- * Big trouble! The inode has become a new object,
- * so any operations attempted on it are invalid.
- *
- * To limit damage, mark the inode as bad so that
- * subsequent lookup validations will fail.
- */
- PARANOIA("%s/%s changed mode, %07o to %07o\n",
- DENTRY_PATH(dentry),
- inode->i_mode, fattr.f_mode);
-
- fattr.f_mode = inode->i_mode; /* save mode */
- make_bad_inode(inode);
- inode->i_mode = fattr.f_mode; /* restore mode */
- /*
- * No need to worry about unhashing the dentry: the
- * lookup validation will see that the inode is bad.
- * But we do want to invalidate the caches ...
- */
- if (!S_ISDIR(inode->i_mode))
- invalidate_remote_inode(inode);
- else
- smb_invalid_dir_cache(inode);
- error = -EIO;
- }
- }
- return error;
-}
-
-/*
- * This is called when we want to check whether the inode
- * has changed on the server. If it has changed, we must
- * invalidate our local caches.
- */
-int
-smb_revalidate_inode(struct dentry *dentry)
-{
- struct smb_sb_info *s = server_from_dentry(dentry);
- struct inode *inode = dentry->d_inode;
- int error = 0;
-
- DEBUG1("smb_revalidate_inode\n");
- lock_kernel();
-
- /*
- * Check whether we've recently refreshed the inode.
- */
- if (time_before(jiffies, SMB_I(inode)->oldmtime + SMB_MAX_AGE(s))) {
- VERBOSE("up-to-date, ino=%ld, jiffies=%lu, oldtime=%lu\n",
- inode->i_ino, jiffies, SMB_I(inode)->oldmtime);
- goto out;
- }
-
- error = smb_refresh_inode(dentry);
-out:
- unlock_kernel();
- return error;
-}
-
-/*
- * This routine is called when i_nlink == 0 and i_count goes to 0.
- * All blocking cleanup operations need to go here to avoid races.
- */
-static void
-smb_evict_inode(struct inode *ino)
-{
- DEBUG1("ino=%ld\n", ino->i_ino);
- truncate_inode_pages(&ino->i_data, 0);
- end_writeback(ino);
- lock_kernel();
- if (smb_close(ino))
- PARANOIA("could not close inode %ld\n", ino->i_ino);
- unlock_kernel();
-}
-
-static struct option opts[] = {
- { "version", 0, 'v' },
- { "win95", SMB_MOUNT_WIN95, 1 },
- { "oldattr", SMB_MOUNT_OLDATTR, 1 },
- { "dirattr", SMB_MOUNT_DIRATTR, 1 },
- { "case", SMB_MOUNT_CASE, 1 },
- { "uid", 0, 'u' },
- { "gid", 0, 'g' },
- { "file_mode", 0, 'f' },
- { "dir_mode", 0, 'd' },
- { "iocharset", 0, 'i' },
- { "codepage", 0, 'c' },
- { "ttl", 0, 't' },
- { NULL, 0, 0}
-};
-
-static int
-parse_options(struct smb_mount_data_kernel *mnt, char *options)
-{
- int c;
- unsigned long flags;
- unsigned long value;
- char *optarg;
- char *optopt;
-
- flags = 0;
- while ( (c = smb_getopt("smbfs", &options, opts,
- &optopt, &optarg, &flags, &value)) > 0) {
-
- VERBOSE("'%s' -> '%s'\n", optopt, optarg ? optarg : "<none>");
- switch (c) {
- case 1:
- /* got a "flag" option */
- break;
- case 'v':
- if (value != SMB_MOUNT_VERSION) {
- printk ("smbfs: Bad mount version %ld, expected %d\n",
- value, SMB_MOUNT_VERSION);
- return 0;
- }
- mnt->version = value;
- break;
- case 'u':
- mnt->uid = value;
- flags |= SMB_MOUNT_UID;
- break;
- case 'g':
- mnt->gid = value;
- flags |= SMB_MOUNT_GID;
- break;
- case 'f':
- mnt->file_mode = (value & S_IRWXUGO) | S_IFREG;
- flags |= SMB_MOUNT_FMODE;
- break;
- case 'd':
- mnt->dir_mode = (value & S_IRWXUGO) | S_IFDIR;
- flags |= SMB_MOUNT_DMODE;
- break;
- case 'i':
- strlcpy(mnt->codepage.local_name, optarg,
- SMB_NLS_MAXNAMELEN);
- break;
- case 'c':
- strlcpy(mnt->codepage.remote_name, optarg,
- SMB_NLS_MAXNAMELEN);
- break;
- case 't':
- mnt->ttl = value;
- break;
- default:
- printk ("smbfs: Unrecognized mount option %s\n",
- optopt);
- return -1;
- }
- }
- mnt->flags = flags;
- return c;
-}
-
-/*
- * smb_show_options() is for displaying mount options in /proc/mounts.
- * It tries to avoid showing settings that were not changed from their
- * defaults.
- */
-static int
-smb_show_options(struct seq_file *s, struct vfsmount *m)
-{
- struct smb_mount_data_kernel *mnt = SMB_SB(m->mnt_sb)->mnt;
- int i;
-
- for (i = 0; opts[i].name != NULL; i++)
- if (mnt->flags & opts[i].flag)
- seq_printf(s, ",%s", opts[i].name);
-
- if (mnt->flags & SMB_MOUNT_UID)
- seq_printf(s, ",uid=%d", mnt->uid);
- if (mnt->flags & SMB_MOUNT_GID)
- seq_printf(s, ",gid=%d", mnt->gid);
- if (mnt->mounted_uid != 0)
- seq_printf(s, ",mounted_uid=%d", mnt->mounted_uid);
-
- /*
- * Defaults for file_mode and dir_mode are unknown to us; they
- * depend on the current umask of the user doing the mount.
- */
- if (mnt->flags & SMB_MOUNT_FMODE)
- seq_printf(s, ",file_mode=%04o", mnt->file_mode & S_IRWXUGO);
- if (mnt->flags & SMB_MOUNT_DMODE)
- seq_printf(s, ",dir_mode=%04o", mnt->dir_mode & S_IRWXUGO);
-
- if (strcmp(mnt->codepage.local_name, CONFIG_NLS_DEFAULT))
- seq_printf(s, ",iocharset=%s", mnt->codepage.local_name);
- if (strcmp(mnt->codepage.remote_name, SMB_NLS_REMOTE))
- seq_printf(s, ",codepage=%s", mnt->codepage.remote_name);
-
- if (mnt->ttl != SMB_TTL_DEFAULT)
- seq_printf(s, ",ttl=%d", mnt->ttl);
-
- return 0;
-}
-
-static void
-smb_unload_nls(struct smb_sb_info *server)
-{
- unload_nls(server->remote_nls);
- unload_nls(server->local_nls);
-}
-
-static void
-smb_put_super(struct super_block *sb)
-{
- struct smb_sb_info *server = SMB_SB(sb);
-
- lock_kernel();
-
- smb_lock_server(server);
- server->state = CONN_INVALID;
- smbiod_unregister_server(server);
-
- smb_close_socket(server);
-
- if (server->conn_pid)
- kill_pid(server->conn_pid, SIGTERM, 1);
-
- bdi_destroy(&server->bdi);
- kfree(server->ops);
- smb_unload_nls(server);
- sb->s_fs_info = NULL;
- smb_unlock_server(server);
- put_pid(server->conn_pid);
- kfree(server);
-
- unlock_kernel();
-}
-
-static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
-{
- struct smb_sb_info *server;
- struct smb_mount_data_kernel *mnt;
- struct smb_mount_data *oldmnt;
- struct inode *root_inode;
- struct smb_fattr root;
- int ver;
- void *mem;
- static int warn_count;
-
- lock_kernel();
-
- if (warn_count < 5) {
- warn_count++;
- printk(KERN_EMERG "smbfs is deprecated and will be removed"
- " from the 2.6.27 kernel. Please migrate to cifs\n");
- }
-
- if (!raw_data)
- goto out_no_data;
-
- oldmnt = (struct smb_mount_data *) raw_data;
- ver = oldmnt->version;
- if (ver != SMB_MOUNT_OLDVERSION && cpu_to_be32(ver) != SMB_MOUNT_ASCII)
- goto out_wrong_data;
-
- sb->s_flags |= MS_NODIRATIME;
- sb->s_blocksize = 1024; /* Eh... Is this correct? */
- sb->s_blocksize_bits = 10;
- sb->s_magic = SMB_SUPER_MAGIC;
- sb->s_op = &smb_sops;
- sb->s_time_gran = 100;
-
- server = kzalloc(sizeof(struct smb_sb_info), GFP_KERNEL);
- if (!server)
- goto out_no_server;
- sb->s_fs_info = server;
-
- if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
- goto out_bdi;
-
- sb->s_bdi = &server->bdi;
-
- server->super_block = sb;
- server->mnt = NULL;
- server->sock_file = NULL;
- init_waitqueue_head(&server->conn_wq);
- init_MUTEX(&server->sem);
- INIT_LIST_HEAD(&server->entry);
- INIT_LIST_HEAD(&server->xmitq);
- INIT_LIST_HEAD(&server->recvq);
- server->conn_error = 0;
- server->conn_pid = NULL;
- server->state = CONN_INVALID; /* no connection yet */
- server->generation = 0;
-
- /* Allocate the global temp buffer and some superblock helper structs */
- /* FIXME: move these to the smb_sb_info struct */
- VERBOSE("alloc chunk = %lu\n", sizeof(struct smb_ops) +
- sizeof(struct smb_mount_data_kernel));
- mem = kmalloc(sizeof(struct smb_ops) +
- sizeof(struct smb_mount_data_kernel), GFP_KERNEL);
- if (!mem)
- goto out_no_mem;
-
- server->ops = mem;
- smb_install_null_ops(server->ops);
- server->mnt = mem + sizeof(struct smb_ops);
-
- /* Setup NLS stuff */
- server->remote_nls = NULL;
- server->local_nls = NULL;
-
- mnt = server->mnt;
-
- memset(mnt, 0, sizeof(struct smb_mount_data_kernel));
- strlcpy(mnt->codepage.local_name, CONFIG_NLS_DEFAULT,
- SMB_NLS_MAXNAMELEN);
- strlcpy(mnt->codepage.remote_name, SMB_NLS_REMOTE,
- SMB_NLS_MAXNAMELEN);
-
- mnt->ttl = SMB_TTL_DEFAULT;
- if (ver == SMB_MOUNT_OLDVERSION) {
- mnt->version = oldmnt->version;
-
- SET_UID(mnt->uid, oldmnt->uid);
- SET_GID(mnt->gid, oldmnt->gid);
-
- mnt->file_mode = (oldmnt->file_mode & S_IRWXUGO) | S_IFREG;
- mnt->dir_mode = (oldmnt->dir_mode & S_IRWXUGO) | S_IFDIR;
-
- mnt->flags = (oldmnt->file_mode >> 9) | SMB_MOUNT_UID |
- SMB_MOUNT_GID | SMB_MOUNT_FMODE | SMB_MOUNT_DMODE;
- } else {
- mnt->file_mode = S_IRWXU | S_IRGRP | S_IXGRP |
- S_IROTH | S_IXOTH | S_IFREG;
- mnt->dir_mode = S_IRWXU | S_IRGRP | S_IXGRP |
- S_IROTH | S_IXOTH | S_IFDIR;
- if (parse_options(mnt, raw_data))
- goto out_bad_option;
- }
- mnt->mounted_uid = current_uid();
- smb_setcodepage(server, &mnt->codepage);
-
- /*
- * Display the enabled options
- * Note: smb_proc_getattr uses these in 2.4 (but was changed in 2.2)
- */
- if (mnt->flags & SMB_MOUNT_OLDATTR)
- printk("SMBFS: Using core getattr (Win 95 speedup)\n");
- else if (mnt->flags & SMB_MOUNT_DIRATTR)
- printk("SMBFS: Using dir ff getattr\n");
-
- if (smbiod_register_server(server) < 0) {
- printk(KERN_ERR "smbfs: failed to start smbiod\n");
- goto out_no_smbiod;
- }
-
- /*
- * Keep the super block locked while we get the root inode.
- */
- smb_init_root_dirent(server, &root, sb);
- root_inode = smb_iget(sb, &root);
- if (!root_inode)
- goto out_no_root;
-
- sb->s_root = d_alloc_root(root_inode);
- if (!sb->s_root)
- goto out_no_root;
-
- smb_new_dentry(sb->s_root);
-
- unlock_kernel();
- return 0;
-
-out_no_root:
- iput(root_inode);
-out_no_smbiod:
- smb_unload_nls(server);
-out_bad_option:
- kfree(mem);
-out_no_mem:
- bdi_destroy(&server->bdi);
-out_bdi:
- if (!server->mnt)
- printk(KERN_ERR "smb_fill_super: allocation failure\n");
- sb->s_fs_info = NULL;
- kfree(server);
- goto out_fail;
-out_wrong_data:
- printk(KERN_ERR "smbfs: mount_data version %d is not supported\n", ver);
- goto out_fail;
-out_no_data:
- printk(KERN_ERR "smb_fill_super: missing data argument\n");
-out_fail:
- unlock_kernel();
- return -EINVAL;
-out_no_server:
- printk(KERN_ERR "smb_fill_super: cannot allocate struct smb_sb_info\n");
- unlock_kernel();
- return -ENOMEM;
-}
-
-static int
-smb_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
- int result;
-
- lock_kernel();
-
- result = smb_proc_dskattr(dentry, buf);
-
- unlock_kernel();
-
- buf->f_type = SMB_SUPER_MAGIC;
- buf->f_namelen = SMB_MAXPATHLEN;
- return result;
-}
-
-int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
-{
- int err = smb_revalidate_inode(dentry);
- if (!err)
- generic_fillattr(dentry->d_inode, stat);
- return err;
-}
-
-int
-smb_notify_change(struct dentry *dentry, struct iattr *attr)
-{
- struct inode *inode = dentry->d_inode;
- struct smb_sb_info *server = server_from_dentry(dentry);
- unsigned int mask = (S_IFREG | S_IFDIR | S_IRWXUGO);
- int error, changed, refresh = 0;
- struct smb_fattr fattr;
-
- lock_kernel();
-
- error = smb_revalidate_inode(dentry);
- if (error)
- goto out;
-
- if ((error = inode_change_ok(inode, attr)) < 0)
- goto out;
-
- error = -EPERM;
- if ((attr->ia_valid & ATTR_UID) && (attr->ia_uid != server->mnt->uid))
- goto out;
-
- if ((attr->ia_valid & ATTR_GID) && (attr->ia_uid != server->mnt->gid))
- goto out;
-
- if ((attr->ia_valid & ATTR_MODE) && (attr->ia_mode & ~mask))
- goto out;
-
- if ((attr->ia_valid & ATTR_SIZE) != 0) {
- VERBOSE("changing %s/%s, old size=%ld, new size=%ld\n",
- DENTRY_PATH(dentry),
- (long) inode->i_size, (long) attr->ia_size);
-
- filemap_write_and_wait(inode->i_mapping);
-
- error = smb_open(dentry, O_WRONLY);
- if (error)
- goto out;
- error = server->ops->truncate(inode, attr->ia_size);
- if (error)
- goto out;
- truncate_setsize(inode, attr->ia_size);
- refresh = 1;
- }
-
- if (server->opt.capabilities & SMB_CAP_UNIX) {
- /* For now we don't want to set the size with setattr_unix */
- attr->ia_valid &= ~ATTR_SIZE;
- /* FIXME: only call if we actually want to set something? */
- error = smb_proc_setattr_unix(dentry, attr, 0, 0);
- if (!error)
- refresh = 1;
-
- goto out;
- }
-
- /*
- * Initialize the fattr and check for changed fields.
- * Note: CTIME under SMB is creation time rather than
- * change time, so we don't attempt to change it.
- */
- smb_get_inode_attr(inode, &fattr);
-
- changed = 0;
- if ((attr->ia_valid & ATTR_MTIME) != 0) {
- fattr.f_mtime = attr->ia_mtime;
- changed = 1;
- }
- if ((attr->ia_valid & ATTR_ATIME) != 0) {
- fattr.f_atime = attr->ia_atime;
- /* Earlier protocols don't have an access time */
- if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2)
- changed = 1;
- }
- if (changed) {
- error = smb_proc_settime(dentry, &fattr);
- if (error)
- goto out;
- refresh = 1;
- }
-
- /*
- * Check for mode changes ... we're extremely limited in
- * what can be set for SMB servers: just the read-only bit.
- */
- if ((attr->ia_valid & ATTR_MODE) != 0) {
- VERBOSE("%s/%s mode change, old=%x, new=%x\n",
- DENTRY_PATH(dentry), fattr.f_mode, attr->ia_mode);
- changed = 0;
- if (attr->ia_mode & S_IWUSR) {
- if (fattr.attr & aRONLY) {
- fattr.attr &= ~aRONLY;
- changed = 1;
- }
- } else {
- if (!(fattr.attr & aRONLY)) {
- fattr.attr |= aRONLY;
- changed = 1;
- }
- }
- if (changed) {
- error = smb_proc_setattr(dentry, &fattr);
- if (error)
- goto out;
- refresh = 1;
- }
- }
- error = 0;
-
-out:
- if (refresh)
- smb_refresh_inode(dentry);
- unlock_kernel();
- return error;
-}
-
-static int smb_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
-{
- return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt);
-}
-
-static struct file_system_type smb_fs_type = {
- .owner = THIS_MODULE,
- .name = "smbfs",
- .get_sb = smb_get_sb,
- .kill_sb = kill_anon_super,
- .fs_flags = FS_BINARY_MOUNTDATA,
-};
-
-static int __init init_smb_fs(void)
-{
- int err;
- DEBUG1("registering ...\n");
-
- err = init_inodecache();
- if (err)
- goto out_inode;
- err = smb_init_request_cache();
- if (err)
- goto out_request;
- err = register_filesystem(&smb_fs_type);
- if (err)
- goto out;
- return 0;
-out:
- smb_destroy_request_cache();
-out_request:
- destroy_inodecache();
-out_inode:
- return err;
-}
-
-static void __exit exit_smb_fs(void)
-{
- DEBUG1("unregistering ...\n");
- unregister_filesystem(&smb_fs_type);
- smb_destroy_request_cache();
- destroy_inodecache();
-}
-
-module_init(init_smb_fs)
-module_exit(exit_smb_fs)
-MODULE_LICENSE("GPL");
diff --git a/fs/smbfs/ioctl.c b/fs/smbfs/ioctl.c
deleted file mode 100644
index 0721531..0000000
--- a/fs/smbfs/ioctl.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * ioctl.c
- *
- * Copyright (C) 1995, 1996 by Volker Lendecke
- * Copyright (C) 1997 by Volker Lendecke
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/ioctl.h>
-#include <linux/time.h>
-#include <linux/mm.h>
-#include <linux/highuid.h>
-#include <linux/smp_lock.h>
-#include <linux/net.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smb_mount.h>
-
-#include <asm/uaccess.h>
-
-#include "proto.h"
-
-long
-smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
- struct smb_sb_info *server = server_from_inode(filp->f_path.dentry->d_inode);
- struct smb_conn_opt opt;
- int result = -EINVAL;
-
- lock_kernel();
- switch (cmd) {
- uid16_t uid16;
- uid_t uid32;
- case SMB_IOC_GETMOUNTUID:
- SET_UID(uid16, server->mnt->mounted_uid);
- result = put_user(uid16, (uid16_t __user *) arg);
- break;
- case SMB_IOC_GETMOUNTUID32:
- SET_UID(uid32, server->mnt->mounted_uid);
- result = put_user(uid32, (uid_t __user *) arg);
- break;
-
- case SMB_IOC_NEWCONN:
- /* arg is smb_conn_opt, or NULL if no connection was made */
- if (!arg) {
- result = 0;
- smb_lock_server(server);
- server->state = CONN_RETRIED;
- printk(KERN_ERR "Connection attempt failed! [%d]\n",
- server->conn_error);
- smbiod_flush(server);
- smb_unlock_server(server);
- break;
- }
-
- result = -EFAULT;
- if (!copy_from_user(&opt, (void __user *)arg, sizeof(opt)))
- result = smb_newconn(server, &opt);
- break;
- default:
- break;
- }
- unlock_kernel();
-
- return result;
-}
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
deleted file mode 100644
index 3dcf638..0000000
--- a/fs/smbfs/proc.c
+++ /dev/null
@@ -1,3503 +0,0 @@
-/*
- * proc.c
- *
- * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- * Copyright (C) 1997 by Volker Lendecke
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/types.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/dcache.h>
-#include <linux/nls.h>
-#include <linux/smp_lock.h>
-#include <linux/net.h>
-#include <linux/vfs.h>
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include <net/sock.h>
-
-#include <asm/string.h>
-#include <asm/div64.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-#include "request.h"
-
-
-/* Features. Undefine if they cause problems, this should perhaps be a
- config option. */
-#define SMBFS_POSIX_UNLINK 1
-
-/* Allow smb_retry to be interrupted. */
-#define SMB_RETRY_INTR
-
-#define SMB_VWV(packet) ((packet) + SMB_HEADER_LEN)
-#define SMB_CMD(packet) (*(packet+8))
-#define SMB_WCT(packet) (*(packet+SMB_HEADER_LEN - 1))
-
-#define SMB_DIRINFO_SIZE 43
-#define SMB_STATUS_SIZE 21
-
-#define SMB_ST_BLKSIZE (PAGE_SIZE)
-#define SMB_ST_BLKSHIFT (PAGE_SHIFT)
-
-static struct smb_ops smb_ops_core;
-static struct smb_ops smb_ops_os2;
-static struct smb_ops smb_ops_win95;
-static struct smb_ops smb_ops_winNT;
-static struct smb_ops smb_ops_unix;
-static struct smb_ops smb_ops_null;
-
-static void
-smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
-static void
-smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
-static int
-smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *fattr);
-static int
-smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
- struct smb_fattr *fattr);
-static int
-smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
- u16 attr);
-static int
-smb_proc_setattr_ext(struct smb_sb_info *server,
- struct inode *inode, struct smb_fattr *fattr);
-static int
-smb_proc_query_cifsunix(struct smb_sb_info *server);
-static void
-install_ops(struct smb_ops *dst, struct smb_ops *src);
-
-
-static void
-str_upper(char *name, int len)
-{
- while (len--)
- {
- if (*name >= 'a' && *name <= 'z')
- *name -= ('a' - 'A');
- name++;
- }
-}
-
-#if 0
-static void
-str_lower(char *name, int len)
-{
- while (len--)
- {
- if (*name >= 'A' && *name <= 'Z')
- *name += ('a' - 'A');
- name++;
- }
-}
-#endif
-
-/* reverse a string inline. This is used by the dircache walking routines */
-static void reverse_string(char *buf, int len)
-{
- char c;
- char *end = buf+len-1;
-
- while(buf < end) {
- c = *buf;
- *(buf++) = *end;
- *(end--) = c;
- }
-}
-
-/* no conversion, just a wrapper for memcpy. */
-static int convert_memcpy(unsigned char *output, int olen,
- const unsigned char *input, int ilen,
- struct nls_table *nls_from,
- struct nls_table *nls_to)
-{
- if (olen < ilen)
- return -ENAMETOOLONG;
- memcpy(output, input, ilen);
- return ilen;
-}
-
-static inline int write_char(unsigned char ch, char *output, int olen)
-{
- if (olen < 4)
- return -ENAMETOOLONG;
- sprintf(output, ":x%02x", ch);
- return 4;
-}
-
-static inline int write_unichar(wchar_t ch, char *output, int olen)
-{
- if (olen < 5)
- return -ENAMETOOLONG;
- sprintf(output, ":%04x", ch);
- return 5;
-}
-
-/* convert from one "codepage" to another (possibly being utf8). */
-static int convert_cp(unsigned char *output, int olen,
- const unsigned char *input, int ilen,
- struct nls_table *nls_from,
- struct nls_table *nls_to)
-{
- int len = 0;
- int n;
- wchar_t ch;
-
- while (ilen > 0) {
- /* convert by changing to unicode and back to the new cp */
- n = nls_from->char2uni(input, ilen, &ch);
- if (n == -EINVAL) {
- ilen--;
- n = write_char(*input++, output, olen);
- if (n < 0)
- goto fail;
- output += n;
- olen -= n;
- len += n;
- continue;
- } else if (n < 0)
- goto fail;
- input += n;
- ilen -= n;
-
- n = nls_to->uni2char(ch, output, olen);
- if (n == -EINVAL)
- n = write_unichar(ch, output, olen);
- if (n < 0)
- goto fail;
- output += n;
- olen -= n;
-
- len += n;
- }
- return len;
-fail:
- return n;
-}
-
-/* ----------------------------------------------------------- */
-
-/*
- * nls_unicode
- *
- * This encodes/decodes little endian unicode format
- */
-
-static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
-{
- if (boundlen < 2)
- return -EINVAL;
- *out++ = uni & 0xff;
- *out++ = uni >> 8;
- return 2;
-}
-
-static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
-{
- if (boundlen < 2)
- return -EINVAL;
- *uni = (rawstring[1] << 8) | rawstring[0];
- return 2;
-}
-
-static struct nls_table unicode_table = {
- .charset = "unicode",
- .uni2char = uni2char,
- .char2uni = char2uni,
-};
-
-/* ----------------------------------------------------------- */
-
-static int setcodepage(struct nls_table **p, char *name)
-{
- struct nls_table *nls;
-
- if (!name || !*name) {
- nls = NULL;
- } else if ( (nls = load_nls(name)) == NULL) {
- printk (KERN_ERR "smbfs: failed to load nls '%s'\n", name);
- return -EINVAL;
- }
-
- /* if already set, unload the previous one. */
- if (*p && *p != &unicode_table)
- unload_nls(*p);
- *p = nls;
-
- return 0;
-}
-
-/* Handles all changes to codepage settings. */
-int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp)
-{
- int n = 0;
-
- smb_lock_server(server);
-
- /* Don't load any nls_* at all, if no remote is requested */
- if (!*cp->remote_name)
- goto out;
-
- /* local */
- n = setcodepage(&server->local_nls, cp->local_name);
- if (n != 0)
- goto out;
-
- /* remote */
- if (!strcmp(cp->remote_name, "unicode")) {
- server->remote_nls = &unicode_table;
- } else {
- n = setcodepage(&server->remote_nls, cp->remote_name);
- if (n != 0)
- setcodepage(&server->local_nls, NULL);
- }
-
-out:
- if (server->local_nls != NULL && server->remote_nls != NULL)
- server->ops->convert = convert_cp;
- else
- server->ops->convert = convert_memcpy;
-
- smb_unlock_server(server);
- return n;
-}
-
-
-/*****************************************************************************/
-/* */
-/* Encoding/Decoding section */
-/* */
-/*****************************************************************************/
-
-static __u8 *
-smb_encode_smb_length(__u8 * p, __u32 len)
-{
- *p = 0;
- *(p+1) = 0;
- *(p+2) = (len & 0xFF00) >> 8;
- *(p+3) = (len & 0xFF);
- if (len > 0xFFFF)
- {
- *(p+1) = 1;
- }
- return p + 4;
-}
-
-/*
- * smb_build_path: build the path to entry and name storing it in buf.
- * The path returned will have the trailing '\0'.
- */
-static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
- int maxlen,
- struct dentry *entry, struct qstr *name)
-{
- unsigned char *path = buf;
- int len;
- int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE) != 0;
-
- if (maxlen < (2<<unicode))
- return -ENAMETOOLONG;
-
- if (maxlen > SMB_MAXPATHLEN + 1)
- maxlen = SMB_MAXPATHLEN + 1;
-
- if (entry == NULL)
- goto test_name_and_out;
-
- /*
- * If IS_ROOT, we have to do no walking at all.
- */
- if (IS_ROOT(entry) && !name) {
- *path++ = '\\';
- if (unicode) *path++ = '\0';
- *path++ = '\0';
- if (unicode) *path++ = '\0';
- return path-buf;
- }
-
- /*
- * Build the path string walking the tree backward from end to ROOT
- * and store it in reversed order [see reverse_string()]
- */
- dget(entry);
- while (!IS_ROOT(entry)) {
- struct dentry *parent;
-
- if (maxlen < (3<<unicode)) {
- dput(entry);
- return -ENAMETOOLONG;
- }
-
- spin_lock(&entry->d_lock);
- len = server->ops->convert(path, maxlen-2,
- entry->d_name.name, entry->d_name.len,
- server->local_nls, server->remote_nls);
- if (len < 0) {
- spin_unlock(&entry->d_lock);
- dput(entry);
- return len;
- }
- reverse_string(path, len);
- path += len;
- if (unicode) {
- /* Note: reverse order */
- *path++ = '\0';
- maxlen--;
- }
- *path++ = '\\';
- maxlen -= len+1;
- spin_unlock(&entry->d_lock);
-
- parent = dget_parent(entry);
- dput(entry);
- entry = parent;
- }
- dput(entry);
- reverse_string(buf, path-buf);
-
- /* maxlen has space for at least one char */
-test_name_and_out:
- if (name) {
- if (maxlen < (3<<unicode))
- return -ENAMETOOLONG;
- *path++ = '\\';
- if (unicode) {
- *path++ = '\0';
- maxlen--;
- }
- len = server->ops->convert(path, maxlen-2,
- name->name, name->len,
- server->local_nls, server->remote_nls);
- if (len < 0)
- return len;
- path += len;
- maxlen -= len+1;
- }
- /* maxlen has space for at least one char */
- *path++ = '\0';
- if (unicode) *path++ = '\0';
- return path-buf;
-}
-
-static int smb_encode_path(struct smb_sb_info *server, char *buf, int maxlen,
- struct dentry *dir, struct qstr *name)
-{
- int result;
-
- result = smb_build_path(server, buf, maxlen, dir, name);
- if (result < 0)
- goto out;
- if (server->opt.protocol <= SMB_PROTOCOL_COREPLUS)
- str_upper(buf, result);
-out:
- return result;
-}
-
-/* encode_path for non-trans2 request SMBs */
-static int smb_simple_encode_path(struct smb_request *req, char **p,
- struct dentry * entry, struct qstr * name)
-{
- struct smb_sb_info *server = req->rq_server;
- char *s = *p;
- int res;
- int maxlen = ((char *)req->rq_buffer + req->rq_bufsize) - s;
- int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
-
- if (!maxlen)
- return -ENAMETOOLONG;
- *s++ = 4; /* ASCII data format */
-
- /*
- * SMB Unicode strings must be 16bit aligned relative the start of the
- * packet. If they are not they must be padded with 0.
- */
- if (unicode) {
- int align = s - (char *)req->rq_buffer;
- if (!(align & 1)) {
- *s++ = '\0';
- maxlen--;
- }
- }
-
- res = smb_encode_path(server, s, maxlen-1, entry, name);
- if (res < 0)
- return res;
- *p = s + res;
- return 0;
-}
-
-/* The following are taken directly from msdos-fs */
-
-/* Linear day numbers of the respective 1sts in non-leap years. */
-
-static int day_n[] =
-{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0};
- /* JanFebMarApr May Jun Jul Aug Sep Oct Nov Dec */
-
-
-static time_t
-utc2local(struct smb_sb_info *server, time_t time)
-{
- return time - server->opt.serverzone*60;
-}
-
-static time_t
-local2utc(struct smb_sb_info *server, time_t time)
-{
- return time + server->opt.serverzone*60;
-}
-
-/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
-
-static time_t
-date_dos2unix(struct smb_sb_info *server, __u16 date, __u16 time)
-{
- int month, year;
- time_t secs;
-
- /* first subtract and mask after that... Otherwise, if
- date == 0, bad things happen */
- month = ((date >> 5) - 1) & 15;
- year = date >> 9;
- secs = (time & 31) * 2 + 60 * ((time >> 5) & 63) + (time >> 11) * 3600 + 86400 *
- ((date & 31) - 1 + day_n[month] + (year / 4) + year * 365 - ((year & 3) == 0 &&
- month < 2 ? 1 : 0) + 3653);
- /* days since 1.1.70 plus 80's leap day */
- return local2utc(server, secs);
-}
-
-
-/* Convert linear UNIX date to a MS-DOS time/date pair. */
-
-static void
-date_unix2dos(struct smb_sb_info *server,
- int unix_date, __u16 *date, __u16 *time)
-{
- int day, year, nl_day, month;
-
- unix_date = utc2local(server, unix_date);
- if (unix_date < 315532800)
- unix_date = 315532800;
-
- *time = (unix_date % 60) / 2 +
- (((unix_date / 60) % 60) << 5) +
- (((unix_date / 3600) % 24) << 11);
-
- day = unix_date / 86400 - 3652;
- year = day / 365;
- if ((year + 3) / 4 + 365 * year > day)
- year--;
- day -= (year + 3) / 4 + 365 * year;
- if (day == 59 && !(year & 3)) {
- nl_day = day;
- month = 2;
- } else {
- nl_day = (year & 3) || day <= 59 ? day : day - 1;
- for (month = 1; month < 12; month++)
- if (day_n[month] > nl_day)
- break;
- }
- *date = nl_day - day_n[month - 1] + 1 + (month << 5) + (year << 9);
-}
-
-/* The following are taken from fs/ntfs/util.c */
-
-#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
-
-/*
- * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
- * into Unix UTC (based 1970-01-01, in seconds).
- */
-static struct timespec
-smb_ntutc2unixutc(u64 ntutc)
-{
- struct timespec ts;
- /* FIXME: what about the timezone difference? */
- /* Subtract the NTFS time offset, then convert to 1s intervals. */
- u64 t = ntutc - NTFS_TIME_OFFSET;
- ts.tv_nsec = do_div(t, 10000000) * 100;
- ts.tv_sec = t;
- return ts;
-}
-
-/* Convert the Unix UTC into NT time */
-static u64
-smb_unixutc2ntutc(struct timespec ts)
-{
- /* Note: timezone conversion is probably wrong. */
- /* return ((u64)utc2local(server, t)) * 10000000 + NTFS_TIME_OFFSET; */
- return ((u64)ts.tv_sec) * 10000000 + ts.tv_nsec/100 + NTFS_TIME_OFFSET;
-}
-
-#define MAX_FILE_MODE 6
-static mode_t file_mode[] = {
- S_IFREG, S_IFDIR, S_IFLNK, S_IFCHR, S_IFBLK, S_IFIFO, S_IFSOCK
-};
-
-static int smb_filetype_to_mode(u32 filetype)
-{
- if (filetype > MAX_FILE_MODE) {
- PARANOIA("Filetype out of range: %d\n", filetype);
- return S_IFREG;
- }
- return file_mode[filetype];
-}
-
-static u32 smb_filetype_from_mode(int mode)
-{
- if (S_ISREG(mode))
- return UNIX_TYPE_FILE;
- if (S_ISDIR(mode))
- return UNIX_TYPE_DIR;
- if (S_ISLNK(mode))
- return UNIX_TYPE_SYMLINK;
- if (S_ISCHR(mode))
- return UNIX_TYPE_CHARDEV;
- if (S_ISBLK(mode))
- return UNIX_TYPE_BLKDEV;
- if (S_ISFIFO(mode))
- return UNIX_TYPE_FIFO;
- if (S_ISSOCK(mode))
- return UNIX_TYPE_SOCKET;
- return UNIX_TYPE_UNKNOWN;
-}
-
-
-/*****************************************************************************/
-/* */
-/* Support section. */
-/* */
-/*****************************************************************************/
-
-__u32
-smb_len(__u8 * p)
-{
- return ((*(p+1) & 0x1) << 16L) | (*(p+2) << 8L) | *(p+3);
-}
-
-static __u16
-smb_bcc(__u8 * packet)
-{
- int pos = SMB_HEADER_LEN + SMB_WCT(packet) * sizeof(__u16);
- return WVAL(packet, pos);
-}
-
-/* smb_valid_packet: We check if packet fulfills the basic
- requirements of a smb packet */
-
-static int
-smb_valid_packet(__u8 * packet)
-{
- return (packet[4] == 0xff
- && packet[5] == 'S'
- && packet[6] == 'M'
- && packet[7] == 'B'
- && (smb_len(packet) + 4 == SMB_HEADER_LEN
- + SMB_WCT(packet) * 2 + smb_bcc(packet)));
-}
-
-/* smb_verify: We check if we got the answer we expected, and if we
- got enough data. If bcc == -1, we don't care. */
-
-static int
-smb_verify(__u8 * packet, int command, int wct, int bcc)
-{
- if (SMB_CMD(packet) != command)
- goto bad_command;
- if (SMB_WCT(packet) < wct)
- goto bad_wct;
- if (bcc != -1 && smb_bcc(packet) < bcc)
- goto bad_bcc;
- return 0;
-
-bad_command:
- printk(KERN_ERR "smb_verify: command=%x, SMB_CMD=%x??\n",
- command, SMB_CMD(packet));
- goto fail;
-bad_wct:
- printk(KERN_ERR "smb_verify: command=%x, wct=%d, SMB_WCT=%d??\n",
- command, wct, SMB_WCT(packet));
- goto fail;
-bad_bcc:
- printk(KERN_ERR "smb_verify: command=%x, bcc=%d, SMB_BCC=%d??\n",
- command, bcc, smb_bcc(packet));
-fail:
- return -EIO;
-}
-
-/*
- * Returns the maximum read or write size for the "payload". Making all of the
- * packet fit within the negotiated max_xmit size.
- *
- * N.B. Since this value is usually computed before locking the server,
- * the server's packet size must never be decreased!
- */
-static inline int
-smb_get_xmitsize(struct smb_sb_info *server, int overhead)
-{
- return server->opt.max_xmit - overhead;
-}
-
-/*
- * Calculate the maximum read size
- */
-int
-smb_get_rsize(struct smb_sb_info *server)
-{
- /* readX has 12 parameters, read has 5 */
- int overhead = SMB_HEADER_LEN + 12 * sizeof(__u16) + 2 + 1 + 2;
- int size = smb_get_xmitsize(server, overhead);
-
- VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
-
- return size;
-}
-
-/*
- * Calculate the maximum write size
- */
-int
-smb_get_wsize(struct smb_sb_info *server)
-{
- /* writeX has 14 parameters, write has 5 */
- int overhead = SMB_HEADER_LEN + 14 * sizeof(__u16) + 2 + 1 + 2;
- int size = smb_get_xmitsize(server, overhead);
-
- VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
-
- return size;
-}
-
-/*
- * Convert SMB error codes to -E... errno values.
- */
-int
-smb_errno(struct smb_request *req)
-{
- int errcls = req->rq_rcls;
- int error = req->rq_err;
- char *class = "Unknown";
-
- VERBOSE("errcls %d code %d from command 0x%x\n",
- errcls, error, SMB_CMD(req->rq_header));
-
- if (errcls == ERRDOS) {
- switch (error) {
- case ERRbadfunc:
- return -EINVAL;
- case ERRbadfile:
- case ERRbadpath:
- return -ENOENT;
- case ERRnofids:
- return -EMFILE;
- case ERRnoaccess:
- return -EACCES;
- case ERRbadfid:
- return -EBADF;
- case ERRbadmcb:
- return -EREMOTEIO;
- case ERRnomem:
- return -ENOMEM;
- case ERRbadmem:
- return -EFAULT;
- case ERRbadenv:
- case ERRbadformat:
- return -EREMOTEIO;
- case ERRbadaccess:
- return -EACCES;
- case ERRbaddata:
- return -E2BIG;
- case ERRbaddrive:
- return -ENXIO;
- case ERRremcd:
- return -EREMOTEIO;
- case ERRdiffdevice:
- return -EXDEV;
- case ERRnofiles:
- return -ENOENT;
- case ERRbadshare:
- return -ETXTBSY;
- case ERRlock:
- return -EDEADLK;
- case ERRfilexists:
- return -EEXIST;
- case ERROR_INVALID_PARAMETER:
- return -EINVAL;
- case ERROR_DISK_FULL:
- return -ENOSPC;
- case ERROR_INVALID_NAME:
- return -ENOENT;
- case ERROR_DIR_NOT_EMPTY:
- return -ENOTEMPTY;
- case ERROR_NOT_LOCKED:
- return -ENOLCK;
- case ERROR_ALREADY_EXISTS:
- return -EEXIST;
- default:
- class = "ERRDOS";
- goto err_unknown;
- }
- } else if (errcls == ERRSRV) {
- switch (error) {
- /* N.B. This is wrong ... EIO ? */
- case ERRerror:
- return -ENFILE;
- case ERRbadpw:
- return -EINVAL;
- case ERRbadtype:
- case ERRtimeout:
- return -EIO;
- case ERRaccess:
- return -EACCES;
- /*
- * This is a fatal error, as it means the "tree ID"
- * for this connection is no longer valid. We map
- * to a special error code and get a new connection.
- */
- case ERRinvnid:
- return -EBADSLT;
- default:
- class = "ERRSRV";
- goto err_unknown;
- }
- } else if (errcls == ERRHRD) {
- switch (error) {
- case ERRnowrite:
- return -EROFS;
- case ERRbadunit:
- return -ENODEV;
- case ERRnotready:
- return -EUCLEAN;
- case ERRbadcmd:
- case ERRdata:
- return -EIO;
- case ERRbadreq:
- return -ERANGE;
- case ERRbadshare:
- return -ETXTBSY;
- case ERRlock:
- return -EDEADLK;
- case ERRdiskfull:
- return -ENOSPC;
- default:
- class = "ERRHRD";
- goto err_unknown;
- }
- } else if (errcls == ERRCMD) {
- class = "ERRCMD";
- } else if (errcls == SUCCESS) {
- return 0; /* This is the only valid 0 return */
- }
-
-err_unknown:
- printk(KERN_ERR "smb_errno: class %s, code %d from command 0x%x\n",
- class, error, SMB_CMD(req->rq_header));
- return -EIO;
-}
-
-/* smb_request_ok: We expect the server to be locked. Then we do the
- request and check the answer completely. When smb_request_ok
- returns 0, you can be quite sure that everything went well. When
- the answer is <=0, the returned number is a valid unix errno. */
-
-static int
-smb_request_ok(struct smb_request *req, int command, int wct, int bcc)
-{
- int result;
-
- req->rq_resp_wct = wct;
- req->rq_resp_bcc = bcc;
-
- result = smb_add_request(req);
- if (result != 0) {
- DEBUG1("smb_request failed\n");
- goto out;
- }
-
- if (smb_valid_packet(req->rq_header) != 0) {
- PARANOIA("invalid packet!\n");
- goto out;
- }
-
- result = smb_verify(req->rq_header, command, wct, bcc);
-
-out:
- return result;
-}
-
-/*
- * This implements the NEWCONN ioctl. It installs the server pid,
- * sets server->state to CONN_VALID, and wakes up the waiting process.
- */
-int
-smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
-{
- struct file *filp;
- struct sock *sk;
- int error;
-
- VERBOSE("fd=%d, pid=%d\n", opt->fd, current->pid);
-
- smb_lock_server(server);
-
- /*
- * Make sure we don't already have a valid connection ...
- */
- error = -EINVAL;
- if (server->state == CONN_VALID)
- goto out;
-
- error = -EACCES;
- if (current_uid() != server->mnt->mounted_uid &&
- !capable(CAP_SYS_ADMIN))
- goto out;
-
- error = -EBADF;
- filp = fget(opt->fd);
- if (!filp)
- goto out;
- if (!smb_valid_socket(filp->f_path.dentry->d_inode))
- goto out_putf;
-
- server->sock_file = filp;
- server->conn_pid = get_pid(task_pid(current));
- server->opt = *opt;
- server->generation += 1;
- server->state = CONN_VALID;
- error = 0;
-
- if (server->conn_error) {
- /*
- * conn_error is the returncode we originally decided to
- * drop the old connection on. This message should be positive
- * and not make people ask questions on why smbfs is printing
- * error messages ...
- */
- printk(KERN_INFO "SMB connection re-established (%d)\n",
- server->conn_error);
- server->conn_error = 0;
- }
-
- /*
- * Store the server in sock user_data (Only used by sunrpc)
- */
- sk = SOCKET_I(filp->f_path.dentry->d_inode)->sk;
- sk->sk_user_data = server;
-
- /* chain into the data_ready callback */
- server->data_ready = xchg(&sk->sk_data_ready, smb_data_ready);
-
- /* check if we have an old smbmount that uses seconds for the
- serverzone */
- if (server->opt.serverzone > 12*60 || server->opt.serverzone < -12*60)
- server->opt.serverzone /= 60;
-
- /* now that we have an established connection we can detect the server
- type and enable bug workarounds */
- if (server->opt.protocol < SMB_PROTOCOL_LANMAN2)
- install_ops(server->ops, &smb_ops_core);
- else if (server->opt.protocol == SMB_PROTOCOL_LANMAN2)
- install_ops(server->ops, &smb_ops_os2);
- else if (server->opt.protocol == SMB_PROTOCOL_NT1 &&
- (server->opt.max_xmit < 0x1000) &&
- !(server->opt.capabilities & SMB_CAP_NT_SMBS)) {
- /* FIXME: can we kill the WIN95 flag now? */
- server->mnt->flags |= SMB_MOUNT_WIN95;
- VERBOSE("detected WIN95 server\n");
- install_ops(server->ops, &smb_ops_win95);
- } else {
- /*
- * Samba has max_xmit 65535
- * NT4spX has max_xmit 4536 (or something like that)
- * win2k has ...
- */
- VERBOSE("detected NT1 (Samba, NT4/5) server\n");
- install_ops(server->ops, &smb_ops_winNT);
- }
-
- /* FIXME: the win9x code wants to modify these ... (seek/trunc bug) */
- if (server->mnt->flags & SMB_MOUNT_OLDATTR) {
- server->ops->getattr = smb_proc_getattr_core;
- } else if (server->mnt->flags & SMB_MOUNT_DIRATTR) {
- server->ops->getattr = smb_proc_getattr_ff;
- }
-
- /* Decode server capabilities */
- if (server->opt.capabilities & SMB_CAP_LARGE_FILES) {
- /* Should be ok to set this now, as no one can access the
- mount until the connection has been established. */
- SB_of(server)->s_maxbytes = ~0ULL >> 1;
- VERBOSE("LFS enabled\n");
- }
- if (server->opt.capabilities & SMB_CAP_UNICODE) {
- server->mnt->flags |= SMB_MOUNT_UNICODE;
- VERBOSE("Unicode enabled\n");
- } else {
- server->mnt->flags &= ~SMB_MOUNT_UNICODE;
- }
-#if 0
- /* flags we may test for other patches ... */
- if (server->opt.capabilities & SMB_CAP_LARGE_READX) {
- VERBOSE("Large reads enabled\n");
- }
- if (server->opt.capabilities & SMB_CAP_LARGE_WRITEX) {
- VERBOSE("Large writes enabled\n");
- }
-#endif
- if (server->opt.capabilities & SMB_CAP_UNIX) {
- struct inode *inode;
- VERBOSE("Using UNIX CIFS extensions\n");
- install_ops(server->ops, &smb_ops_unix);
- inode = SB_of(server)->s_root->d_inode;
- if (inode)
- inode->i_op = &smb_dir_inode_operations_unix;
- }
-
- VERBOSE("protocol=%d, max_xmit=%d, pid=%d capabilities=0x%x\n",
- server->opt.protocol, server->opt.max_xmit,
- pid_nr(server->conn_pid), server->opt.capabilities);
-
- /* FIXME: this really should be done by smbmount. */
- if (server->opt.max_xmit > SMB_MAX_PACKET_SIZE) {
- server->opt.max_xmit = SMB_MAX_PACKET_SIZE;
- }
-
- smb_unlock_server(server);
- smbiod_wake_up();
- if (server->opt.capabilities & SMB_CAP_UNIX)
- smb_proc_query_cifsunix(server);
-
- server->conn_complete++;
- wake_up_interruptible_all(&server->conn_wq);
- return error;
-
-out:
- smb_unlock_server(server);
- smbiod_wake_up();
- return error;
-
-out_putf:
- fput(filp);
- goto out;
-}
-
-/* smb_setup_header: We completely set up the packet. You only have to
- insert the command-specific fields */
-
-__u8 *
-smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc)
-{
- __u32 xmit_len = SMB_HEADER_LEN + wct * sizeof(__u16) + bcc + 2;
- __u8 *p = req->rq_header;
- struct smb_sb_info *server = req->rq_server;
-
- p = smb_encode_smb_length(p, xmit_len - 4);
-
- *p++ = 0xff;
- *p++ = 'S';
- *p++ = 'M';
- *p++ = 'B';
- *p++ = command;
-
- memset(p, '\0', 19);
- p += 19;
- p += 8;
-
- if (server->opt.protocol > SMB_PROTOCOL_CORE) {
- int flags = SMB_FLAGS_CASELESS_PATHNAMES;
- int flags2 = SMB_FLAGS2_LONG_PATH_COMPONENTS |
- SMB_FLAGS2_EXTENDED_ATTRIBUTES; /* EA? not really ... */
-
- *(req->rq_header + smb_flg) = flags;
- if (server->mnt->flags & SMB_MOUNT_UNICODE)
- flags2 |= SMB_FLAGS2_UNICODE_STRINGS;
- WSET(req->rq_header, smb_flg2, flags2);
- }
- *p++ = wct; /* wct */
- p += 2 * wct;
- WSET(p, 0, bcc);
-
- /* Include the header in the data to send */
- req->rq_iovlen = 1;
- req->rq_iov[0].iov_base = req->rq_header;
- req->rq_iov[0].iov_len = xmit_len - bcc;
-
- return req->rq_buffer;
-}
-
-static void
-smb_setup_bcc(struct smb_request *req, __u8 *p)
-{
- u16 bcc = p - req->rq_buffer;
- u8 *pbcc = req->rq_header + SMB_HEADER_LEN + 2*SMB_WCT(req->rq_header);
-
- WSET(pbcc, 0, bcc);
-
- smb_encode_smb_length(req->rq_header, SMB_HEADER_LEN +
- 2*SMB_WCT(req->rq_header) - 2 + bcc);
-
- /* Include the "bytes" in the data to send */
- req->rq_iovlen = 2;
- req->rq_iov[1].iov_base = req->rq_buffer;
- req->rq_iov[1].iov_len = bcc;
-}
-
-static int
-smb_proc_seek(struct smb_sb_info *server, __u16 fileid,
- __u16 mode, off_t offset)
-{
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBlseek, 4, 0);
- WSET(req->rq_header, smb_vwv0, fileid);
- WSET(req->rq_header, smb_vwv1, mode);
- DSET(req->rq_header, smb_vwv2, offset);
- req->rq_flags |= SMB_REQ_NORETRY;
-
- result = smb_request_ok(req, SMBlseek, 2, 0);
- if (result < 0) {
- result = 0;
- goto out_free;
- }
-
- result = DVAL(req->rq_header, smb_vwv0);
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_open(struct smb_sb_info *server, struct dentry *dentry, int wish)
-{
- struct inode *ino = dentry->d_inode;
- struct smb_inode_info *ei = SMB_I(ino);
- int mode, read_write = 0x42, read_only = 0x40;
- int res;
- char *p;
- struct smb_request *req;
-
- /*
- * Attempt to open r/w, unless there are no write privileges.
- */
- mode = read_write;
- if (!(ino->i_mode & (S_IWUSR | S_IWGRP | S_IWOTH)))
- mode = read_only;
-#if 0
- /* FIXME: why is this code not in? below we fix it so that a caller
- wanting RO doesn't get RW. smb_revalidate_inode does some
- optimization based on access mode. tail -f needs it to be correct.
-
- We must open rw since we don't do the open if called a second time
- with different 'wish'. Is that not supported by smb servers? */
- if (!(wish & (O_WRONLY | O_RDWR)))
- mode = read_only;
-#endif
-
- res = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- retry:
- p = smb_setup_header(req, SMBopen, 2, 0);
- WSET(req->rq_header, smb_vwv0, mode);
- WSET(req->rq_header, smb_vwv1, aSYSTEM | aHIDDEN | aDIR);
- res = smb_simple_encode_path(req, &p, dentry, NULL);
- if (res < 0)
- goto out_free;
- smb_setup_bcc(req, p);
-
- res = smb_request_ok(req, SMBopen, 7, 0);
- if (res != 0) {
- if (mode == read_write &&
- (res == -EACCES || res == -ETXTBSY || res == -EROFS))
- {
- VERBOSE("%s/%s R/W failed, error=%d, retrying R/O\n",
- DENTRY_PATH(dentry), res);
- mode = read_only;
- req->rq_flags = 0;
- goto retry;
- }
- goto out_free;
- }
- /* We should now have data in vwv[0..6]. */
-
- ei->fileid = WVAL(req->rq_header, smb_vwv0);
- ei->attr = WVAL(req->rq_header, smb_vwv1);
- /* smb_vwv2 has mtime */
- /* smb_vwv4 has size */
- ei->access = (WVAL(req->rq_header, smb_vwv6) & SMB_ACCMASK);
- ei->open = server->generation;
-
-out_free:
- smb_rput(req);
-out:
- return res;
-}
-
-/*
- * Make sure the file is open, and check that the access
- * is compatible with the desired access.
- */
-int
-smb_open(struct dentry *dentry, int wish)
-{
- struct inode *inode = dentry->d_inode;
- int result;
- __u16 access;
-
- result = -ENOENT;
- if (!inode) {
- printk(KERN_ERR "smb_open: no inode for dentry %s/%s\n",
- DENTRY_PATH(dentry));
- goto out;
- }
-
- if (!smb_is_open(inode)) {
- struct smb_sb_info *server = server_from_inode(inode);
- result = 0;
- if (!smb_is_open(inode))
- result = smb_proc_open(server, dentry, wish);
- if (result)
- goto out;
- /*
- * A successful open means the path is still valid ...
- */
- smb_renew_times(dentry);
- }
-
- /*
- * Check whether the access is compatible with the desired mode.
- */
- result = 0;
- access = SMB_I(inode)->access;
- if (access != wish && access != SMB_O_RDWR) {
- PARANOIA("%s/%s access denied, access=%x, wish=%x\n",
- DENTRY_PATH(dentry), access, wish);
- result = -EACCES;
- }
-out:
- return result;
-}
-
-static int
-smb_proc_close(struct smb_sb_info *server, __u16 fileid, __u32 mtime)
-{
- struct smb_request *req;
- int result = -ENOMEM;
-
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBclose, 3, 0);
- WSET(req->rq_header, smb_vwv0, fileid);
- DSET(req->rq_header, smb_vwv1, utc2local(server, mtime));
- req->rq_flags |= SMB_REQ_NORETRY;
- result = smb_request_ok(req, SMBclose, 0, 0);
-
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * Win NT 4.0 has an apparent bug in that it fails to update the
- * modify time when writing to a file. As a workaround, we update
- * both modify and access time locally, and post the times to the
- * server when closing the file.
- */
-static int
-smb_proc_close_inode(struct smb_sb_info *server, struct inode * ino)
-{
- struct smb_inode_info *ei = SMB_I(ino);
- int result = 0;
- if (smb_is_open(ino))
- {
- /*
- * We clear the open flag in advance, in case another
- * process observes the value while we block below.
- */
- ei->open = 0;
-
- /*
- * Kludge alert: SMB timestamps are accurate only to
- * two seconds ... round the times to avoid needless
- * cache invalidations!
- */
- if (ino->i_mtime.tv_sec & 1) {
- ino->i_mtime.tv_sec--;
- ino->i_mtime.tv_nsec = 0;
- }
- if (ino->i_atime.tv_sec & 1) {
- ino->i_atime.tv_sec--;
- ino->i_atime.tv_nsec = 0;
- }
- /*
- * If the file is open with write permissions,
- * update the time stamps to sync mtime and atime.
- */
- if ((server->opt.capabilities & SMB_CAP_UNIX) == 0 &&
- (server->opt.protocol >= SMB_PROTOCOL_LANMAN2) &&
- !(ei->access == SMB_O_RDONLY))
- {
- struct smb_fattr fattr;
- smb_get_inode_attr(ino, &fattr);
- smb_proc_setattr_ext(server, ino, &fattr);
- }
-
- result = smb_proc_close(server, ei->fileid, ino->i_mtime.tv_sec);
- /*
- * Force a revalidation after closing ... some servers
- * don't post the size until the file has been closed.
- */
- if (server->opt.protocol < SMB_PROTOCOL_NT1)
- ei->oldmtime = 0;
- ei->closed = jiffies;
- }
- return result;
-}
-
-int
-smb_close(struct inode *ino)
-{
- int result = 0;
-
- if (smb_is_open(ino)) {
- struct smb_sb_info *server = server_from_inode(ino);
- result = smb_proc_close_inode(server, ino);
- }
- return result;
-}
-
-/*
- * This is used to close a file following a failed instantiate.
- * Since we don't have an inode, we can't use any of the above.
- */
-int
-smb_close_fileid(struct dentry *dentry, __u16 fileid)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- int result;
-
- result = smb_proc_close(server, fileid, get_seconds());
- return result;
-}
-
-/* In smb_proc_read and smb_proc_write we do not retry, because the
- file-id would not be valid after a reconnection. */
-
-static void
-smb_proc_read_data(struct smb_request *req)
-{
- req->rq_iov[0].iov_base = req->rq_buffer;
- req->rq_iov[0].iov_len = 3;
-
- req->rq_iov[1].iov_base = req->rq_page;
- req->rq_iov[1].iov_len = req->rq_rsize;
- req->rq_iovlen = 2;
-
- req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
-}
-
-static int
-smb_proc_read(struct inode *inode, loff_t offset, int count, char *data)
-{
- struct smb_sb_info *server = server_from_inode(inode);
- __u16 returned_count, data_len;
- unsigned char *buf;
- int result;
- struct smb_request *req;
- u8 rbuf[4];
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBread, 5, 0);
- buf = req->rq_header;
- WSET(buf, smb_vwv0, SMB_I(inode)->fileid);
- WSET(buf, smb_vwv1, count);
- DSET(buf, smb_vwv2, offset);
- WSET(buf, smb_vwv4, 0);
-
- req->rq_page = data;
- req->rq_rsize = count;
- req->rq_callback = smb_proc_read_data;
- req->rq_buffer = rbuf;
- req->rq_flags |= SMB_REQ_NORETRY | SMB_REQ_STATIC;
-
- result = smb_request_ok(req, SMBread, 5, -1);
- if (result < 0)
- goto out_free;
- returned_count = WVAL(req->rq_header, smb_vwv0);
-
- data_len = WVAL(rbuf, 1);
-
- if (returned_count != data_len) {
- printk(KERN_NOTICE "smb_proc_read: returned != data_len\n");
- printk(KERN_NOTICE "smb_proc_read: ret_c=%d, data_len=%d\n",
- returned_count, data_len);
- }
- result = data_len;
-
-out_free:
- smb_rput(req);
-out:
- VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
- inode->i_ino, SMB_I(inode)->fileid, count, result);
- return result;
-}
-
-static int
-smb_proc_write(struct inode *inode, loff_t offset, int count, const char *data)
-{
- struct smb_sb_info *server = server_from_inode(inode);
- int result;
- u16 fileid = SMB_I(inode)->fileid;
- u8 buf[4];
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
- inode->i_ino, fileid, count, offset);
-
- smb_setup_header(req, SMBwrite, 5, count + 3);
- WSET(req->rq_header, smb_vwv0, fileid);
- WSET(req->rq_header, smb_vwv1, count);
- DSET(req->rq_header, smb_vwv2, offset);
- WSET(req->rq_header, smb_vwv4, 0);
-
- buf[0] = 1;
- WSET(buf, 1, count); /* yes, again ... */
- req->rq_iov[1].iov_base = buf;
- req->rq_iov[1].iov_len = 3;
- req->rq_iov[2].iov_base = (char *) data;
- req->rq_iov[2].iov_len = count;
- req->rq_iovlen = 3;
- req->rq_flags |= SMB_REQ_NORETRY;
-
- result = smb_request_ok(req, SMBwrite, 1, 0);
- if (result >= 0)
- result = WVAL(req->rq_header, smb_vwv0);
-
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * In smb_proc_readX and smb_proc_writeX we do not retry, because the
- * file-id would not be valid after a reconnection.
- */
-
-#define SMB_READX_MAX_PAD 64
-static void
-smb_proc_readX_data(struct smb_request *req)
-{
- /* header length, excluding the netbios length (-4) */
- int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
- int data_off = WVAL(req->rq_header, smb_vwv6);
-
- /*
- * Some genius made the padding to the data bytes arbitrary.
- * So we must first calculate the amount of padding used by the server.
- */
- data_off -= hdrlen;
- if (data_off > SMB_READX_MAX_PAD || data_off < 0) {
- PARANOIA("offset is larger than SMB_READX_MAX_PAD or negative!\n");
- PARANOIA("%d > %d || %d < 0\n", data_off, SMB_READX_MAX_PAD, data_off);
- req->rq_rlen = req->rq_bufsize + 1;
- return;
- }
- req->rq_iov[0].iov_base = req->rq_buffer;
- req->rq_iov[0].iov_len = data_off;
-
- req->rq_iov[1].iov_base = req->rq_page;
- req->rq_iov[1].iov_len = req->rq_rsize;
- req->rq_iovlen = 2;
-
- req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
-}
-
-static int
-smb_proc_readX(struct inode *inode, loff_t offset, int count, char *data)
-{
- struct smb_sb_info *server = server_from_inode(inode);
- unsigned char *buf;
- int result;
- struct smb_request *req;
- static char pad[SMB_READX_MAX_PAD];
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBreadX, 12, 0);
- buf = req->rq_header;
- WSET(buf, smb_vwv0, 0x00ff);
- WSET(buf, smb_vwv1, 0);
- WSET(buf, smb_vwv2, SMB_I(inode)->fileid);
- DSET(buf, smb_vwv3, (u32)offset); /* low 32 bits */
- WSET(buf, smb_vwv5, count);
- WSET(buf, smb_vwv6, 0);
- DSET(buf, smb_vwv7, 0);
- WSET(buf, smb_vwv9, 0);
- DSET(buf, smb_vwv10, (u32)(offset >> 32)); /* high 32 bits */
- WSET(buf, smb_vwv11, 0);
-
- req->rq_page = data;
- req->rq_rsize = count;
- req->rq_callback = smb_proc_readX_data;
- req->rq_buffer = pad;
- req->rq_bufsize = SMB_READX_MAX_PAD;
- req->rq_flags |= SMB_REQ_STATIC | SMB_REQ_NORETRY;
-
- result = smb_request_ok(req, SMBreadX, 12, -1);
- if (result < 0)
- goto out_free;
- result = WVAL(req->rq_header, smb_vwv5);
-
-out_free:
- smb_rput(req);
-out:
- VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
- inode->i_ino, SMB_I(inode)->fileid, count, result);
- return result;
-}
-
-static int
-smb_proc_writeX(struct inode *inode, loff_t offset, int count, const char *data)
-{
- struct smb_sb_info *server = server_from_inode(inode);
- int result;
- u8 *p;
- static u8 pad[4];
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
- inode->i_ino, SMB_I(inode)->fileid, count, offset);
-
- p = smb_setup_header(req, SMBwriteX, 14, count + 1);
- WSET(req->rq_header, smb_vwv0, 0x00ff);
- WSET(req->rq_header, smb_vwv1, 0);
- WSET(req->rq_header, smb_vwv2, SMB_I(inode)->fileid);
- DSET(req->rq_header, smb_vwv3, (u32)offset); /* low 32 bits */
- DSET(req->rq_header, smb_vwv5, 0);
- WSET(req->rq_header, smb_vwv7, 0); /* write mode */
- WSET(req->rq_header, smb_vwv8, 0);
- WSET(req->rq_header, smb_vwv9, 0);
- WSET(req->rq_header, smb_vwv10, count); /* data length */
- WSET(req->rq_header, smb_vwv11, smb_vwv12 + 2 + 1);
- DSET(req->rq_header, smb_vwv12, (u32)(offset >> 32));
-
- req->rq_iov[1].iov_base = pad;
- req->rq_iov[1].iov_len = 1;
- req->rq_iov[2].iov_base = (char *) data;
- req->rq_iov[2].iov_len = count;
- req->rq_iovlen = 3;
- req->rq_flags |= SMB_REQ_NORETRY;
-
- result = smb_request_ok(req, SMBwriteX, 6, 0);
- if (result >= 0)
- result = WVAL(req->rq_header, smb_vwv2);
-
- smb_rput(req);
-out:
- return result;
-}
-
-int
-smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- char *p;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- p = smb_setup_header(req, SMBcreate, 3, 0);
- WSET(req->rq_header, smb_vwv0, attr);
- DSET(req->rq_header, smb_vwv1, utc2local(server, ctime));
- result = smb_simple_encode_path(req, &p, dentry, NULL);
- if (result < 0)
- goto out_free;
- smb_setup_bcc(req, p);
-
- result = smb_request_ok(req, SMBcreate, 1, 0);
- if (result < 0)
- goto out_free;
-
- *fileid = WVAL(req->rq_header, smb_vwv0);
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-int
-smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry)
-{
- struct smb_sb_info *server = server_from_dentry(old_dentry);
- char *p;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- p = smb_setup_header(req, SMBmv, 1, 0);
- WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN | aDIR);
- result = smb_simple_encode_path(req, &p, old_dentry, NULL);
- if (result < 0)
- goto out_free;
- result = smb_simple_encode_path(req, &p, new_dentry, NULL);
- if (result < 0)
- goto out_free;
- smb_setup_bcc(req, p);
-
- if ((result = smb_request_ok(req, SMBmv, 0, 0)) < 0)
- goto out_free;
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * Code common to mkdir and rmdir.
- */
-static int
-smb_proc_generic_command(struct dentry *dentry, __u8 command)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- char *p;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- p = smb_setup_header(req, command, 0, 0);
- result = smb_simple_encode_path(req, &p, dentry, NULL);
- if (result < 0)
- goto out_free;
- smb_setup_bcc(req, p);
-
- result = smb_request_ok(req, command, 0, 0);
- if (result < 0)
- goto out_free;
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-int
-smb_proc_mkdir(struct dentry *dentry)
-{
- return smb_proc_generic_command(dentry, SMBmkdir);
-}
-
-int
-smb_proc_rmdir(struct dentry *dentry)
-{
- return smb_proc_generic_command(dentry, SMBrmdir);
-}
-
-#if SMBFS_POSIX_UNLINK
-/*
- * Removes readonly attribute from a file. Used by unlink to give posix
- * semantics.
- */
-static int
-smb_set_rw(struct dentry *dentry,struct smb_sb_info *server)
-{
- int result;
- struct smb_fattr fattr;
-
- /* FIXME: cifsUE should allow removing a readonly file. */
-
- /* first get current attribute */
- smb_init_dirent(server, &fattr);
- result = server->ops->getattr(server, dentry, &fattr);
- smb_finish_dirent(server, &fattr);
- if (result < 0)
- return result;
-
- /* if RONLY attribute is set, remove it */
- if (fattr.attr & aRONLY) { /* read only attribute is set */
- fattr.attr &= ~aRONLY;
- result = smb_proc_setattr_core(server, dentry, fattr.attr);
- }
- return result;
-}
-#endif
-
-int
-smb_proc_unlink(struct dentry *dentry)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- int flag = 0;
- char *p;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- retry:
- p = smb_setup_header(req, SMBunlink, 1, 0);
- WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN);
- result = smb_simple_encode_path(req, &p, dentry, NULL);
- if (result < 0)
- goto out_free;
- smb_setup_bcc(req, p);
-
- if ((result = smb_request_ok(req, SMBunlink, 0, 0)) < 0) {
-#if SMBFS_POSIX_UNLINK
- if (result == -EACCES && !flag) {
- /* Posix semantics is for the read-only state
- of a file to be ignored in unlink(). In the
- SMB world a unlink() is refused on a
- read-only file. To make things easier for
- unix users we try to override the files
- permission if the unlink fails with the
- right error.
- This introduces a race condition that could
- lead to a file being written by someone who
- shouldn't have access, but as far as I can
- tell that is unavoidable */
-
- /* remove RONLY attribute and try again */
- result = smb_set_rw(dentry,server);
- if (result == 0) {
- flag = 1;
- req->rq_flags = 0;
- goto retry;
- }
- }
-#endif
- goto out_free;
- }
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-int
-smb_proc_flush(struct smb_sb_info *server, __u16 fileid)
-{
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBflush, 1, 0);
- WSET(req->rq_header, smb_vwv0, fileid);
- req->rq_flags |= SMB_REQ_NORETRY;
- result = smb_request_ok(req, SMBflush, 0, 0);
-
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_trunc32(struct inode *inode, loff_t length)
-{
- /*
- * Writing 0bytes is old-SMB magic for truncating files.
- * MAX_NON_LFS should prevent this from being called with a too
- * large offset.
- */
- return smb_proc_write(inode, length, 0, NULL);
-}
-
-static int
-smb_proc_trunc64(struct inode *inode, loff_t length)
-{
- struct smb_sb_info *server = server_from_inode(inode);
- int result;
- char *param;
- char *data;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 14)))
- goto out;
-
- param = req->rq_buffer;
- data = req->rq_buffer + 6;
-
- /* FIXME: must we also set allocation size? winNT seems to do that */
- WSET(param, 0, SMB_I(inode)->fileid);
- WSET(param, 2, SMB_SET_FILE_END_OF_FILE_INFO);
- WSET(param, 4, 0);
- LSET(data, 0, length);
-
- req->rq_trans2_command = TRANSACT2_SETFILEINFO;
- req->rq_ldata = 8;
- req->rq_data = data;
- req->rq_lparm = 6;
- req->rq_parm = param;
- req->rq_flags |= SMB_REQ_NORETRY;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
-
- result = 0;
- if (req->rq_rcls != 0)
- result = smb_errno(req);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_trunc95(struct inode *inode, loff_t length)
-{
- struct smb_sb_info *server = server_from_inode(inode);
- int result = smb_proc_trunc32(inode, length);
-
- /*
- * win9x doesn't appear to update the size immediately.
- * It will return the old file size after the truncate,
- * confusing smbfs. So we force an update.
- *
- * FIXME: is this still necessary?
- */
- smb_proc_flush(server, SMB_I(inode)->fileid);
- return result;
-}
-
-static void
-smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
-{
- memset(fattr, 0, sizeof(*fattr));
-
- fattr->f_nlink = 1;
- fattr->f_uid = server->mnt->uid;
- fattr->f_gid = server->mnt->gid;
- fattr->f_unix = 0;
-}
-
-static void
-smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
-{
- if (fattr->f_unix)
- return;
-
- fattr->f_mode = server->mnt->file_mode;
- if (fattr->attr & aDIR) {
- fattr->f_mode = server->mnt->dir_mode;
- fattr->f_size = SMB_ST_BLKSIZE;
- }
- /* Check the read-only flag */
- if (fattr->attr & aRONLY)
- fattr->f_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
-
- /* How many 512 byte blocks do we need for this file? */
- fattr->f_blocks = 0;
- if (fattr->f_size != 0)
- fattr->f_blocks = 1 + ((fattr->f_size-1) >> 9);
- return;
-}
-
-void
-smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
- struct super_block *sb)
-{
- smb_init_dirent(server, fattr);
- fattr->attr = aDIR;
- fattr->f_ino = 2; /* traditional root inode number */
- fattr->f_mtime = current_fs_time(sb);
- smb_finish_dirent(server, fattr);
-}
-
-/*
- * Decode a dirent for old protocols
- *
- * qname is filled with the decoded, and possibly translated, name.
- * fattr receives decoded attributes
- *
- * Bugs Noted:
- * (1) Pathworks servers may pad the name with extra spaces.
- */
-static char *
-smb_decode_short_dirent(struct smb_sb_info *server, char *p,
- struct qstr *qname, struct smb_fattr *fattr,
- unsigned char *name_buf)
-{
- int len;
-
- /*
- * SMB doesn't have a concept of inode numbers ...
- */
- smb_init_dirent(server, fattr);
- fattr->f_ino = 0; /* FIXME: do we need this? */
-
- p += SMB_STATUS_SIZE; /* reserved (search_status) */
- fattr->attr = *p;
- fattr->f_mtime.tv_sec = date_dos2unix(server, WVAL(p, 3), WVAL(p, 1));
- fattr->f_mtime.tv_nsec = 0;
- fattr->f_size = DVAL(p, 5);
- fattr->f_ctime = fattr->f_mtime;
- fattr->f_atime = fattr->f_mtime;
- qname->name = p + 9;
- len = strnlen(qname->name, 12);
-
- /*
- * Trim trailing blanks for Pathworks servers
- */
- while (len > 2 && qname->name[len-1] == ' ')
- len--;
-
- smb_finish_dirent(server, fattr);
-
-#if 0
- /* FIXME: These only work for ascii chars, and recent smbmount doesn't
- allow the flag to be set anyway. It kills const. Remove? */
- switch (server->opt.case_handling) {
- case SMB_CASE_UPPER:
- str_upper(entry->name, len);
- break;
- case SMB_CASE_LOWER:
- str_lower(entry->name, len);
- break;
- default:
- break;
- }
-#endif
-
- qname->len = 0;
- len = server->ops->convert(name_buf, SMB_MAXNAMELEN,
- qname->name, len,
- server->remote_nls, server->local_nls);
- if (len > 0) {
- qname->len = len;
- qname->name = name_buf;
- DEBUG1("len=%d, name=%.*s\n",qname->len,qname->len,qname->name);
- }
-
- return p + 22;
-}
-
-/*
- * This routine is used to read in directory entries from the network.
- * Note that it is for short directory name seeks, i.e.: protocol <
- * SMB_PROTOCOL_LANMAN2
- */
-static int
-smb_proc_readdir_short(struct file *filp, void *dirent, filldir_t filldir,
- struct smb_cache_control *ctl)
-{
- struct dentry *dir = filp->f_path.dentry;
- struct smb_sb_info *server = server_from_dentry(dir);
- struct qstr qname;
- struct smb_fattr fattr;
- char *p;
- int result;
- int i, first, entries_seen, entries;
- int entries_asked = (server->opt.max_xmit - 100) / SMB_DIRINFO_SIZE;
- __u16 bcc;
- __u16 count;
- char status[SMB_STATUS_SIZE];
- static struct qstr mask = {
- .name = "*.*",
- .len = 3,
- };
- unsigned char *last_status;
- struct smb_request *req;
- unsigned char *name_buf;
-
- VERBOSE("%s/%s\n", DENTRY_PATH(dir));
-
- lock_kernel();
-
- result = -ENOMEM;
- if (! (name_buf = kmalloc(SMB_MAXNAMELEN, GFP_KERNEL)))
- goto out;
-
- first = 1;
- entries = 0;
- entries_seen = 2; /* implicit . and .. */
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
- goto out_name;
-
- while (1) {
- p = smb_setup_header(req, SMBsearch, 2, 0);
- WSET(req->rq_header, smb_vwv0, entries_asked);
- WSET(req->rq_header, smb_vwv1, aDIR);
- if (first == 1) {
- result = smb_simple_encode_path(req, &p, dir, &mask);
- if (result < 0)
- goto out_free;
- if (p + 3 > (char *)req->rq_buffer + req->rq_bufsize) {
- result = -ENAMETOOLONG;
- goto out_free;
- }
- *p++ = 5;
- WSET(p, 0, 0);
- p += 2;
- first = 0;
- } else {
- if (p + 5 + SMB_STATUS_SIZE >
- (char *)req->rq_buffer + req->rq_bufsize) {
- result = -ENAMETOOLONG;
- goto out_free;
- }
-
- *p++ = 4;
- *p++ = 0;
- *p++ = 5;
- WSET(p, 0, SMB_STATUS_SIZE);
- p += 2;
- memcpy(p, status, SMB_STATUS_SIZE);
- p += SMB_STATUS_SIZE;
- }
-
- smb_setup_bcc(req, p);
-
- result = smb_request_ok(req, SMBsearch, 1, -1);
- if (result < 0) {
- if ((req->rq_rcls == ERRDOS) &&
- (req->rq_err == ERRnofiles))
- break;
- goto out_free;
- }
- count = WVAL(req->rq_header, smb_vwv0);
- if (count <= 0)
- break;
-
- result = -EIO;
- bcc = smb_bcc(req->rq_header);
- if (bcc != count * SMB_DIRINFO_SIZE + 3)
- goto out_free;
- p = req->rq_buffer + 3;
-
-
- /* Make sure the response fits in the buffer. Fixed sized
- entries means we don't have to check in the decode loop. */
-
- last_status = req->rq_buffer + 3 + (count-1) * SMB_DIRINFO_SIZE;
-
- if (last_status + SMB_DIRINFO_SIZE >=
- req->rq_buffer + req->rq_bufsize) {
- printk(KERN_ERR "smb_proc_readdir_short: "
- "last dir entry outside buffer! "
- "%d@%p %d@%p\n", SMB_DIRINFO_SIZE, last_status,
- req->rq_bufsize, req->rq_buffer);
- goto out_free;
- }
-
- /* Read the last entry into the status field. */
- memcpy(status, last_status, SMB_STATUS_SIZE);
-
-
- /* Now we are ready to parse smb directory entries. */
-
- for (i = 0; i < count; i++) {
- p = smb_decode_short_dirent(server, p,
- &qname, &fattr, name_buf);
- if (qname.len == 0)
- continue;
-
- if (entries_seen == 2 && qname.name[0] == '.') {
- if (qname.len == 1)
- continue;
- if (qname.name[1] == '.' && qname.len == 2)
- continue;
- }
- if (!smb_fill_cache(filp, dirent, filldir, ctl,
- &qname, &fattr))
- ; /* stop reading? */
- entries_seen++;
- }
- }
- result = entries;
-
-out_free:
- smb_rput(req);
-out_name:
- kfree(name_buf);
-out:
- unlock_kernel();
- return result;
-}
-
-static void smb_decode_unix_basic(struct smb_fattr *fattr, struct smb_sb_info *server, char *p)
-{
- u64 size, disk_bytes;
-
- /* FIXME: verify nls support. all is sent as utf8? */
-
- fattr->f_unix = 1;
- fattr->f_mode = 0;
-
- /* FIXME: use the uniqueID from the remote instead? */
- /* 0 L file size in bytes */
- /* 8 L file size on disk in bytes (block count) */
- /* 40 L uid */
- /* 48 L gid */
- /* 56 W file type */
- /* 60 L devmajor */
- /* 68 L devminor */
- /* 76 L unique ID (inode) */
- /* 84 L permissions */
- /* 92 L link count */
-
- size = LVAL(p, 0);
- disk_bytes = LVAL(p, 8);
-
- /*
- * Some samba versions round up on-disk byte usage
- * to 1MB boundaries, making it useless. When seeing
- * that, use the size instead.
- */
- if (!(disk_bytes & 0xfffff))
- disk_bytes = size+511;
-
- fattr->f_size = size;
- fattr->f_blocks = disk_bytes >> 9;
- fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 16));
- fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 24));
- fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 32));
-
- if (server->mnt->flags & SMB_MOUNT_UID)
- fattr->f_uid = server->mnt->uid;
- else
- fattr->f_uid = LVAL(p, 40);
-
- if (server->mnt->flags & SMB_MOUNT_GID)
- fattr->f_gid = server->mnt->gid;
- else
- fattr->f_gid = LVAL(p, 48);
-
- fattr->f_mode |= smb_filetype_to_mode(WVAL(p, 56));
-
- if (S_ISBLK(fattr->f_mode) || S_ISCHR(fattr->f_mode)) {
- __u64 major = LVAL(p, 60);
- __u64 minor = LVAL(p, 68);
-
- fattr->f_rdev = MKDEV(major & 0xffffffff, minor & 0xffffffff);
- if (MAJOR(fattr->f_rdev) != (major & 0xffffffff) ||
- MINOR(fattr->f_rdev) != (minor & 0xffffffff))
- fattr->f_rdev = 0;
- }
-
- fattr->f_mode |= LVAL(p, 84);
-
- if ( (server->mnt->flags & SMB_MOUNT_DMODE) &&
- (S_ISDIR(fattr->f_mode)) )
- fattr->f_mode = (server->mnt->dir_mode & S_IRWXUGO) | S_IFDIR;
- else if ( (server->mnt->flags & SMB_MOUNT_FMODE) &&
- !(S_ISDIR(fattr->f_mode)) )
- fattr->f_mode = (server->mnt->file_mode & S_IRWXUGO) |
- (fattr->f_mode & S_IFMT);
-
-}
-
-/*
- * Interpret a long filename structure using the specified info level:
- * level 1 for anything below NT1 protocol
- * level 260 for NT1 protocol
- *
- * qname is filled with the decoded, and possibly translated, name
- * fattr receives decoded attributes.
- *
- * Bugs Noted:
- * (1) Win NT 4.0 appends a null byte to names and counts it in the length!
- */
-static char *
-smb_decode_long_dirent(struct smb_sb_info *server, char *p, int level,
- struct qstr *qname, struct smb_fattr *fattr,
- unsigned char *name_buf)
-{
- char *result;
- unsigned int len = 0;
- int n;
- __u16 date, time;
- int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
-
- /*
- * SMB doesn't have a concept of inode numbers ...
- */
- smb_init_dirent(server, fattr);
- fattr->f_ino = 0; /* FIXME: do we need this? */
-
- switch (level) {
- case 1:
- len = *((unsigned char *) p + 22);
- qname->name = p + 23;
- result = p + 24 + len;
-
- date = WVAL(p, 0);
- time = WVAL(p, 2);
- fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
- fattr->f_ctime.tv_nsec = 0;
-
- date = WVAL(p, 4);
- time = WVAL(p, 6);
- fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
- fattr->f_atime.tv_nsec = 0;
-
- date = WVAL(p, 8);
- time = WVAL(p, 10);
- fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
- fattr->f_mtime.tv_nsec = 0;
- fattr->f_size = DVAL(p, 12);
- /* ULONG allocation size */
- fattr->attr = WVAL(p, 20);
-
- VERBOSE("info 1 at %p, len=%d, name=%.*s\n",
- p, len, len, qname->name);
- break;
- case 260:
- result = p + WVAL(p, 0);
- len = DVAL(p, 60);
- if (len > 255) len = 255;
- /* NT4 null terminates, unless we are using unicode ... */
- qname->name = p + 94;
- if (!unicode && len && qname->name[len-1] == '\0')
- len--;
-
- fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 8));
- fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 16));
- fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 24));
- /* change time (32) */
- fattr->f_size = LVAL(p, 40);
- /* alloc size (48) */
- fattr->attr = DVAL(p, 56);
-
- VERBOSE("info 260 at %p, len=%d, name=%.*s\n",
- p, len, len, qname->name);
- break;
- case SMB_FIND_FILE_UNIX:
- result = p + WVAL(p, 0);
- qname->name = p + 108;
-
- len = strlen(qname->name);
- /* FIXME: should we check the length?? */
-
- p += 8;
- smb_decode_unix_basic(fattr, server, p);
- VERBOSE("info SMB_FIND_FILE_UNIX at %p, len=%d, name=%.*s\n",
- p, len, len, qname->name);
- break;
- default:
- PARANOIA("Unknown info level %d\n", level);
- result = p + WVAL(p, 0);
- goto out;
- }
-
- smb_finish_dirent(server, fattr);
-
-#if 0
- /* FIXME: These only work for ascii chars, and recent smbmount doesn't
- allow the flag to be set anyway. Remove? */
- switch (server->opt.case_handling) {
- case SMB_CASE_UPPER:
- str_upper(qname->name, len);
- break;
- case SMB_CASE_LOWER:
- str_lower(qname->name, len);
- break;
- default:
- break;
- }
-#endif
-
- qname->len = 0;
- n = server->ops->convert(name_buf, SMB_MAXNAMELEN,
- qname->name, len,
- server->remote_nls, server->local_nls);
- if (n > 0) {
- qname->len = n;
- qname->name = name_buf;
- }
-
-out:
- return result;
-}
-
-/* findfirst/findnext flags */
-#define SMB_CLOSE_AFTER_FIRST (1<<0)
-#define SMB_CLOSE_IF_END (1<<1)
-#define SMB_REQUIRE_RESUME_KEY (1<<2)
-#define SMB_CONTINUE_BIT (1<<3)
-
-/*
- * Note: samba-2.0.7 (at least) has a very similar routine, cli_list, in
- * source/libsmb/clilist.c. When looking for smb bugs in the readdir code,
- * go there for advise.
- *
- * Bugs Noted:
- * (1) When using Info Level 1 Win NT 4.0 truncates directory listings
- * for certain patterns of names and/or lengths. The breakage pattern
- * is completely reproducible and can be toggled by the creation of a
- * single file. (E.g. echo hi >foo breaks, rm -f foo works.)
- */
-static int
-smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
- struct smb_cache_control *ctl)
-{
- struct dentry *dir = filp->f_path.dentry;
- struct smb_sb_info *server = server_from_dentry(dir);
- struct qstr qname;
- struct smb_fattr fattr;
-
- unsigned char *p, *lastname;
- char *mask, *param;
- __u16 command;
- int first, entries_seen;
-
- /* Both NT and OS/2 accept info level 1 (but see note below). */
- int info_level = 260;
- const int max_matches = 512;
-
- unsigned int ff_searchcount = 0;
- unsigned int ff_eos = 0;
- unsigned int ff_lastname = 0;
- unsigned int ff_dir_handle = 0;
- unsigned int loop_count = 0;
- unsigned int mask_len, i;
- int result;
- struct smb_request *req;
- unsigned char *name_buf;
- static struct qstr star = {
- .name = "*",
- .len = 1,
- };
-
- lock_kernel();
-
- /*
- * We always prefer unix style. Use info level 1 for older
- * servers that don't do 260.
- */
- if (server->opt.capabilities & SMB_CAP_UNIX)
- info_level = SMB_FIND_FILE_UNIX;
- else if (server->opt.protocol < SMB_PROTOCOL_NT1)
- info_level = 1;
-
- result = -ENOMEM;
- if (! (name_buf = kmalloc(SMB_MAXNAMELEN+2, GFP_KERNEL)))
- goto out;
- if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
- goto out_name;
- param = req->rq_buffer;
-
- /*
- * Encode the initial path
- */
- mask = param + 12;
-
- result = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dir, &star);
- if (result <= 0)
- goto out_free;
- mask_len = result - 1; /* mask_len is strlen, not #bytes */
- result = 0;
- first = 1;
- VERBOSE("starting mask_len=%d, mask=%s\n", mask_len, mask);
-
- entries_seen = 2;
- ff_eos = 0;
-
- while (ff_eos == 0) {
- loop_count += 1;
- if (loop_count > 10) {
- printk(KERN_WARNING "smb_proc_readdir_long: "
- "Looping in FIND_NEXT??\n");
- result = -EIO;
- break;
- }
-
- if (first != 0) {
- command = TRANSACT2_FINDFIRST;
- WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
- WSET(param, 2, max_matches); /* max count */
- WSET(param, 4, SMB_CLOSE_IF_END);
- WSET(param, 6, info_level);
- DSET(param, 8, 0);
- } else {
- command = TRANSACT2_FINDNEXT;
-
- VERBOSE("handle=0x%X, lastname=%d, mask=%.*s\n",
- ff_dir_handle, ff_lastname, mask_len, mask);
-
- WSET(param, 0, ff_dir_handle); /* search handle */
- WSET(param, 2, max_matches); /* max count */
- WSET(param, 4, info_level);
- DSET(param, 6, 0);
- WSET(param, 10, SMB_CONTINUE_BIT|SMB_CLOSE_IF_END);
- }
-
- req->rq_trans2_command = command;
- req->rq_ldata = 0;
- req->rq_data = NULL;
- req->rq_lparm = 12 + mask_len + 1;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0) {
- PARANOIA("error=%d, breaking\n", result);
- break;
- }
-
- if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) {
- /* a damn Win95 bug - sometimes it clags if you
- ask it too fast */
- schedule_timeout_interruptible(msecs_to_jiffies(200));
- continue;
- }
-
- if (req->rq_rcls != 0) {
- result = smb_errno(req);
- PARANOIA("name=%s, result=%d, rcls=%d, err=%d\n",
- mask, result, req->rq_rcls, req->rq_err);
- break;
- }
-
- /* parse out some important return info */
- if (first != 0) {
- ff_dir_handle = WVAL(req->rq_parm, 0);
- ff_searchcount = WVAL(req->rq_parm, 2);
- ff_eos = WVAL(req->rq_parm, 4);
- ff_lastname = WVAL(req->rq_parm, 8);
- } else {
- ff_searchcount = WVAL(req->rq_parm, 0);
- ff_eos = WVAL(req->rq_parm, 2);
- ff_lastname = WVAL(req->rq_parm, 6);
- }
-
- if (ff_searchcount == 0)
- break;
-
- /* Now we are ready to parse smb directory entries. */
-
- /* point to the data bytes */
- p = req->rq_data;
- for (i = 0; i < ff_searchcount; i++) {
- /* make sure we stay within the buffer */
- if (p >= req->rq_data + req->rq_ldata) {
- printk(KERN_ERR "smb_proc_readdir_long: "
- "dirent pointer outside buffer! "
- "%p %d@%p\n",
- p, req->rq_ldata, req->rq_data);
- result = -EIO; /* always a comm. error? */
- goto out_free;
- }
-
- p = smb_decode_long_dirent(server, p, info_level,
- &qname, &fattr, name_buf);
-
- /* ignore . and .. from the server */
- if (entries_seen == 2 && qname.name[0] == '.') {
- if (qname.len == 1)
- continue;
- if (qname.name[1] == '.' && qname.len == 2)
- continue;
- }
-
- if (!smb_fill_cache(filp, dirent, filldir, ctl,
- &qname, &fattr))
- ; /* stop reading? */
- entries_seen++;
- }
-
- VERBOSE("received %d entries, eos=%d\n", ff_searchcount,ff_eos);
-
- /*
- * We might need the lastname for continuations.
- *
- * Note that some servers (win95?) point to the filename and
- * others (NT4, Samba using NT1) to the dir entry. We assume
- * here that those who do not point to a filename do not need
- * this info to continue the listing.
- *
- * OS/2 needs this and talks infolevel 1.
- * NetApps want lastname with infolevel 260.
- * win2k want lastname with infolevel 260, and points to
- * the record not to the name.
- * Samba+CifsUnixExt doesn't need lastname.
- *
- * Both are happy if we return the data they point to. So we do.
- * (FIXME: above is not true with win2k)
- */
- mask_len = 0;
- if (info_level != SMB_FIND_FILE_UNIX &&
- ff_lastname > 0 && ff_lastname < req->rq_ldata) {
- lastname = req->rq_data + ff_lastname;
-
- switch (info_level) {
- case 260:
- mask_len = req->rq_ldata - ff_lastname;
- break;
- case 1:
- /* lastname points to a length byte */
- mask_len = *lastname++;
- if (ff_lastname + 1 + mask_len > req->rq_ldata)
- mask_len = req->rq_ldata - ff_lastname - 1;
- break;
- }
-
- /*
- * Update the mask string for the next message.
- */
- if (mask_len > 255)
- mask_len = 255;
- if (mask_len)
- strncpy(mask, lastname, mask_len);
- }
- mask_len = strnlen(mask, mask_len);
- VERBOSE("new mask, len=%d@%d of %d, mask=%.*s\n",
- mask_len, ff_lastname, req->rq_ldata, mask_len, mask);
-
- first = 0;
- loop_count = 0;
- }
-
-out_free:
- smb_rput(req);
-out_name:
- kfree(name_buf);
-out:
- unlock_kernel();
- return result;
-}
-
-/*
- * This version uses the trans2 TRANSACT2_FINDFIRST message
- * to get the attribute data.
- *
- * Bugs Noted:
- */
-static int
-smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
- struct smb_fattr *fattr)
-{
- char *param, *mask;
- __u16 date, time;
- int mask_len, result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
- param = req->rq_buffer;
- mask = param + 12;
-
- mask_len = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dentry,NULL);
- if (mask_len < 0) {
- result = mask_len;
- goto out_free;
- }
- VERBOSE("name=%s, len=%d\n", mask, mask_len);
- WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
- WSET(param, 2, 1); /* max count */
- WSET(param, 4, 1); /* close after this call */
- WSET(param, 6, 1); /* info_level */
- DSET(param, 8, 0);
-
- req->rq_trans2_command = TRANSACT2_FINDFIRST;
- req->rq_ldata = 0;
- req->rq_data = NULL;
- req->rq_lparm = 12 + mask_len;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
- if (req->rq_rcls != 0) {
- result = smb_errno(req);
-#ifdef SMBFS_PARANOIA
- if (result != -ENOENT)
- PARANOIA("error for %s, rcls=%d, err=%d\n",
- mask, req->rq_rcls, req->rq_err);
-#endif
- goto out_free;
- }
- /* Make sure we got enough data ... */
- result = -EINVAL;
- if (req->rq_ldata < 22 || WVAL(req->rq_parm, 2) != 1) {
- PARANOIA("bad result for %s, len=%d, count=%d\n",
- mask, req->rq_ldata, WVAL(req->rq_parm, 2));
- goto out_free;
- }
-
- /*
- * Decode the response into the fattr ...
- */
- date = WVAL(req->rq_data, 0);
- time = WVAL(req->rq_data, 2);
- fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
- fattr->f_ctime.tv_nsec = 0;
-
- date = WVAL(req->rq_data, 4);
- time = WVAL(req->rq_data, 6);
- fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
- fattr->f_atime.tv_nsec = 0;
-
- date = WVAL(req->rq_data, 8);
- time = WVAL(req->rq_data, 10);
- fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
- fattr->f_mtime.tv_nsec = 0;
- VERBOSE("name=%s, date=%x, time=%x, mtime=%ld\n",
- mask, date, time, fattr->f_mtime.tv_sec);
- fattr->f_size = DVAL(req->rq_data, 12);
- /* ULONG allocation size */
- fattr->attr = WVAL(req->rq_data, 20);
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *fattr)
-{
- int result;
- char *p;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- p = smb_setup_header(req, SMBgetatr, 0, 0);
- result = smb_simple_encode_path(req, &p, dir, NULL);
- if (result < 0)
- goto out_free;
- smb_setup_bcc(req, p);
-
- if ((result = smb_request_ok(req, SMBgetatr, 10, 0)) < 0)
- goto out_free;
- fattr->attr = WVAL(req->rq_header, smb_vwv0);
- fattr->f_mtime.tv_sec = local2utc(server, DVAL(req->rq_header, smb_vwv1));
- fattr->f_mtime.tv_nsec = 0;
- fattr->f_size = DVAL(req->rq_header, smb_vwv3);
- fattr->f_ctime = fattr->f_mtime;
- fattr->f_atime = fattr->f_mtime;
-#ifdef SMBFS_DEBUG_TIMESTAMP
- printk("getattr_core: %s/%s, mtime=%ld\n",
- DENTRY_PATH(dir), fattr->f_mtime);
-#endif
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * Bugs Noted:
- * (1) Win 95 swaps the date and time fields in the standard info level.
- */
-static int
-smb_proc_getattr_trans2(struct smb_sb_info *server, struct dentry *dir,
- struct smb_request *req, int infolevel)
-{
- char *p, *param;
- int result;
-
- param = req->rq_buffer;
- WSET(param, 0, infolevel);
- DSET(param, 2, 0);
- result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
- if (result < 0)
- goto out;
- p = param + 6 + result;
-
- req->rq_trans2_command = TRANSACT2_QPATHINFO;
- req->rq_ldata = 0;
- req->rq_data = NULL;
- req->rq_lparm = p - param;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out;
- if (req->rq_rcls != 0) {
- VERBOSE("for %s: result=%d, rcls=%d, err=%d\n",
- &param[6], result, req->rq_rcls, req->rq_err);
- result = smb_errno(req);
- goto out;
- }
- result = -ENOENT;
- if (req->rq_ldata < 22) {
- PARANOIA("not enough data for %s, len=%d\n",
- &param[6], req->rq_ldata);
- goto out;
- }
-
- result = 0;
-out:
- return result;
-}
-
-static int
-smb_proc_getattr_trans2_std(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *attr)
-{
- u16 date, time;
- int off_date = 0, off_time = 2;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- result = smb_proc_getattr_trans2(server, dir, req, SMB_INFO_STANDARD);
- if (result < 0)
- goto out_free;
-
- /*
- * Kludge alert: Win 95 swaps the date and time field,
- * contrary to the CIFS docs and Win NT practice.
- */
- if (server->mnt->flags & SMB_MOUNT_WIN95) {
- off_date = 2;
- off_time = 0;
- }
- date = WVAL(req->rq_data, off_date);
- time = WVAL(req->rq_data, off_time);
- attr->f_ctime.tv_sec = date_dos2unix(server, date, time);
- attr->f_ctime.tv_nsec = 0;
-
- date = WVAL(req->rq_data, 4 + off_date);
- time = WVAL(req->rq_data, 4 + off_time);
- attr->f_atime.tv_sec = date_dos2unix(server, date, time);
- attr->f_atime.tv_nsec = 0;
-
- date = WVAL(req->rq_data, 8 + off_date);
- time = WVAL(req->rq_data, 8 + off_time);
- attr->f_mtime.tv_sec = date_dos2unix(server, date, time);
- attr->f_mtime.tv_nsec = 0;
-#ifdef SMBFS_DEBUG_TIMESTAMP
- printk(KERN_DEBUG "getattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n",
- DENTRY_PATH(dir), date, time, attr->f_mtime);
-#endif
- attr->f_size = DVAL(req->rq_data, 12);
- attr->attr = WVAL(req->rq_data, 20);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_getattr_trans2_all(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *attr)
-{
- struct smb_request *req;
- int result;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- result = smb_proc_getattr_trans2(server, dir, req,
- SMB_QUERY_FILE_ALL_INFO);
- if (result < 0)
- goto out_free;
-
- attr->f_ctime = smb_ntutc2unixutc(LVAL(req->rq_data, 0));
- attr->f_atime = smb_ntutc2unixutc(LVAL(req->rq_data, 8));
- attr->f_mtime = smb_ntutc2unixutc(LVAL(req->rq_data, 16));
- /* change (24) */
- attr->attr = WVAL(req->rq_data, 32);
- /* pad? (34) */
- /* allocated size (40) */
- attr->f_size = LVAL(req->rq_data, 48);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_getattr_unix(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *attr)
-{
- struct smb_request *req;
- int result;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- result = smb_proc_getattr_trans2(server, dir, req,
- SMB_QUERY_FILE_UNIX_BASIC);
- if (result < 0)
- goto out_free;
-
- smb_decode_unix_basic(attr, server, req->rq_data);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_getattr_95(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *attr)
-{
- struct inode *inode = dir->d_inode;
- int result;
-
- /* FIXME: why not use the "all" version? */
- result = smb_proc_getattr_trans2_std(server, dir, attr);
- if (result < 0)
- goto out;
-
- /*
- * None of the getattr versions here can make win9x return the right
- * filesize if there are changes made to an open file.
- * A seek-to-end does return the right size, but we only need to do
- * that on files we have written.
- */
- if (inode && SMB_I(inode)->flags & SMB_F_LOCALWRITE &&
- smb_is_open(inode))
- {
- __u16 fileid = SMB_I(inode)->fileid;
- attr->f_size = smb_proc_seek(server, fileid, 2, 0);
- }
-
-out:
- return result;
-}
-
-static int
-smb_proc_ops_wait(struct smb_sb_info *server)
-{
- int result;
-
- result = wait_event_interruptible_timeout(server->conn_wq,
- server->conn_complete, 30*HZ);
-
- if (!result || signal_pending(current))
- return -EIO;
-
- return 0;
-}
-
-static int
-smb_proc_getattr_null(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *fattr)
-{
- int result;
-
- if (smb_proc_ops_wait(server) < 0)
- return -EIO;
-
- smb_init_dirent(server, fattr);
- result = server->ops->getattr(server, dir, fattr);
- smb_finish_dirent(server, fattr);
-
- return result;
-}
-
-static int
-smb_proc_readdir_null(struct file *filp, void *dirent, filldir_t filldir,
- struct smb_cache_control *ctl)
-{
- struct smb_sb_info *server = server_from_dentry(filp->f_path.dentry);
-
- if (smb_proc_ops_wait(server) < 0)
- return -EIO;
-
- return server->ops->readdir(filp, dirent, filldir, ctl);
-}
-
-int
-smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr)
-{
- struct smb_sb_info *server = server_from_dentry(dir);
- int result;
-
- smb_init_dirent(server, fattr);
- result = server->ops->getattr(server, dir, fattr);
- smb_finish_dirent(server, fattr);
-
- return result;
-}
-
-
-/*
- * Because of bugs in the core protocol, we use this only to set
- * attributes. See smb_proc_settime() below for timestamp handling.
- *
- * Bugs Noted:
- * (1) If mtime is non-zero, both Win 3.1 and Win 95 fail
- * with an undocumented error (ERRDOS code 50). Setting
- * mtime to 0 allows the attributes to be set.
- * (2) The extra parameters following the name string aren't
- * in the CIFS docs, but seem to be necessary for operation.
- */
-static int
-smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
- __u16 attr)
-{
- char *p;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- p = smb_setup_header(req, SMBsetatr, 8, 0);
- WSET(req->rq_header, smb_vwv0, attr);
- DSET(req->rq_header, smb_vwv1, 0); /* mtime */
- WSET(req->rq_header, smb_vwv3, 0); /* reserved values */
- WSET(req->rq_header, smb_vwv4, 0);
- WSET(req->rq_header, smb_vwv5, 0);
- WSET(req->rq_header, smb_vwv6, 0);
- WSET(req->rq_header, smb_vwv7, 0);
- result = smb_simple_encode_path(req, &p, dentry, NULL);
- if (result < 0)
- goto out_free;
- if (p + 2 > (char *)req->rq_buffer + req->rq_bufsize) {
- result = -ENAMETOOLONG;
- goto out_free;
- }
- *p++ = 4;
- *p++ = 0;
- smb_setup_bcc(req, p);
-
- result = smb_request_ok(req, SMBsetatr, 0, 0);
- if (result < 0)
- goto out_free;
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * Because of bugs in the trans2 setattr messages, we must set
- * attributes and timestamps separately. The core SMBsetatr
- * message seems to be the only reliable way to set attributes.
- */
-int
-smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr)
-{
- struct smb_sb_info *server = server_from_dentry(dir);
- int result;
-
- VERBOSE("setting %s/%s, open=%d\n",
- DENTRY_PATH(dir), smb_is_open(dir->d_inode));
- result = smb_proc_setattr_core(server, dir, fattr->attr);
- return result;
-}
-
-/*
- * Sets the timestamps for an file open with write permissions.
- */
-static int
-smb_proc_setattr_ext(struct smb_sb_info *server,
- struct inode *inode, struct smb_fattr *fattr)
-{
- __u16 date, time;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBsetattrE, 7, 0);
- WSET(req->rq_header, smb_vwv0, SMB_I(inode)->fileid);
- /* We don't change the creation time */
- WSET(req->rq_header, smb_vwv1, 0);
- WSET(req->rq_header, smb_vwv2, 0);
- date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
- WSET(req->rq_header, smb_vwv3, date);
- WSET(req->rq_header, smb_vwv4, time);
- date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
- WSET(req->rq_header, smb_vwv5, date);
- WSET(req->rq_header, smb_vwv6, time);
-#ifdef SMBFS_DEBUG_TIMESTAMP
- printk(KERN_DEBUG "smb_proc_setattr_ext: date=%d, time=%d, mtime=%ld\n",
- date, time, fattr->f_mtime);
-#endif
-
- req->rq_flags |= SMB_REQ_NORETRY;
- result = smb_request_ok(req, SMBsetattrE, 0, 0);
- if (result < 0)
- goto out_free;
- result = 0;
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * Bugs Noted:
- * (1) The TRANSACT2_SETPATHINFO message under Win NT 4.0 doesn't
- * set the file's attribute flags.
- */
-static int
-smb_proc_setattr_trans2(struct smb_sb_info *server,
- struct dentry *dir, struct smb_fattr *fattr)
-{
- __u16 date, time;
- char *p, *param;
- int result;
- char data[26];
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
- param = req->rq_buffer;
-
- WSET(param, 0, 1); /* Info level SMB_INFO_STANDARD */
- DSET(param, 2, 0);
- result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
- if (result < 0)
- goto out_free;
- p = param + 6 + result;
-
- WSET(data, 0, 0); /* creation time */
- WSET(data, 2, 0);
- date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
- WSET(data, 4, date);
- WSET(data, 6, time);
- date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
- WSET(data, 8, date);
- WSET(data, 10, time);
-#ifdef SMBFS_DEBUG_TIMESTAMP
- printk(KERN_DEBUG "setattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n",
- DENTRY_PATH(dir), date, time, fattr->f_mtime);
-#endif
- DSET(data, 12, 0); /* size */
- DSET(data, 16, 0); /* blksize */
- WSET(data, 20, 0); /* attr */
- DSET(data, 22, 0); /* ULONG EA size */
-
- req->rq_trans2_command = TRANSACT2_SETPATHINFO;
- req->rq_ldata = 26;
- req->rq_data = data;
- req->rq_lparm = p - param;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
- result = 0;
- if (req->rq_rcls != 0)
- result = smb_errno(req);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * ATTR_MODE 0x001
- * ATTR_UID 0x002
- * ATTR_GID 0x004
- * ATTR_SIZE 0x008
- * ATTR_ATIME 0x010
- * ATTR_MTIME 0x020
- * ATTR_CTIME 0x040
- * ATTR_ATIME_SET 0x080
- * ATTR_MTIME_SET 0x100
- * ATTR_FORCE 0x200
- * ATTR_ATTR_FLAG 0x400
- *
- * major/minor should only be set by mknod.
- */
-int
-smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
- unsigned int major, unsigned int minor)
-{
- struct smb_sb_info *server = server_from_dentry(d);
- u64 nttime;
- char *p, *param;
- int result;
- char data[100];
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
- param = req->rq_buffer;
-
- DEBUG1("valid flags = 0x%04x\n", attr->ia_valid);
-
- WSET(param, 0, SMB_SET_FILE_UNIX_BASIC);
- DSET(param, 2, 0);
- result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
- if (result < 0)
- goto out_free;
- p = param + 6 + result;
-
- /* 0 L file size in bytes */
- /* 8 L file size on disk in bytes (block count) */
- /* 40 L uid */
- /* 48 L gid */
- /* 56 W file type enum */
- /* 60 L devmajor */
- /* 68 L devminor */
- /* 76 L unique ID (inode) */
- /* 84 L permissions */
- /* 92 L link count */
- LSET(data, 0, SMB_SIZE_NO_CHANGE);
- LSET(data, 8, SMB_SIZE_NO_CHANGE);
- LSET(data, 16, SMB_TIME_NO_CHANGE);
- LSET(data, 24, SMB_TIME_NO_CHANGE);
- LSET(data, 32, SMB_TIME_NO_CHANGE);
- LSET(data, 40, SMB_UID_NO_CHANGE);
- LSET(data, 48, SMB_GID_NO_CHANGE);
- DSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
- LSET(data, 60, major);
- LSET(data, 68, minor);
- LSET(data, 76, 0);
- LSET(data, 84, SMB_MODE_NO_CHANGE);
- LSET(data, 92, 0);
-
- if (attr->ia_valid & ATTR_SIZE) {
- LSET(data, 0, attr->ia_size);
- LSET(data, 8, 0); /* can't set anyway */
- }
-
- /*
- * FIXME: check the conversion function it the correct one
- *
- * we can't set ctime but we might as well pass this to the server
- * and let it ignore it.
- */
- if (attr->ia_valid & ATTR_CTIME) {
- nttime = smb_unixutc2ntutc(attr->ia_ctime);
- LSET(data, 16, nttime);
- }
- if (attr->ia_valid & ATTR_ATIME) {
- nttime = smb_unixutc2ntutc(attr->ia_atime);
- LSET(data, 24, nttime);
- }
- if (attr->ia_valid & ATTR_MTIME) {
- nttime = smb_unixutc2ntutc(attr->ia_mtime);
- LSET(data, 32, nttime);
- }
-
- if (attr->ia_valid & ATTR_UID) {
- LSET(data, 40, attr->ia_uid);
- }
- if (attr->ia_valid & ATTR_GID) {
- LSET(data, 48, attr->ia_gid);
- }
-
- if (attr->ia_valid & ATTR_MODE) {
- LSET(data, 84, attr->ia_mode);
- }
-
- req->rq_trans2_command = TRANSACT2_SETPATHINFO;
- req->rq_ldata = 100;
- req->rq_data = data;
- req->rq_lparm = p - param;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-
-/*
- * Set the modify and access timestamps for a file.
- *
- * Incredibly enough, in all of SMB there is no message to allow
- * setting both attributes and timestamps at once.
- *
- * Bugs Noted:
- * (1) Win 95 doesn't support the TRANSACT2_SETFILEINFO message
- * with info level 1 (INFO_STANDARD).
- * (2) Win 95 seems not to support setting directory timestamps.
- * (3) Under the core protocol apparently the only way to set the
- * timestamp is to open and close the file.
- */
-int
-smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- struct inode *inode = dentry->d_inode;
- int result;
-
- VERBOSE("setting %s/%s, open=%d\n",
- DENTRY_PATH(dentry), smb_is_open(inode));
-
- /* setting the time on a Win95 server fails (tridge) */
- if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2 &&
- !(server->mnt->flags & SMB_MOUNT_WIN95)) {
- if (smb_is_open(inode) && SMB_I(inode)->access != SMB_O_RDONLY)
- result = smb_proc_setattr_ext(server, inode, fattr);
- else
- result = smb_proc_setattr_trans2(server, dentry, fattr);
- } else {
- /*
- * Fail silently on directories ... timestamp can't be set?
- */
- result = 0;
- if (S_ISREG(inode->i_mode)) {
- /*
- * Set the mtime by opening and closing the file.
- * Note that the file is opened read-only, but this
- * still allows us to set the date (tridge)
- */
- result = -EACCES;
- if (!smb_is_open(inode))
- smb_proc_open(server, dentry, SMB_O_RDONLY);
- if (smb_is_open(inode)) {
- inode->i_mtime = fattr->f_mtime;
- result = smb_proc_close_inode(server, inode);
- }
- }
- }
-
- return result;
-}
-
-int
-smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr)
-{
- struct smb_sb_info *server = SMB_SB(dentry->d_sb);
- int result;
- char *p;
- long unit;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBdskattr, 0, 0);
- if ((result = smb_request_ok(req, SMBdskattr, 5, 0)) < 0)
- goto out_free;
- p = SMB_VWV(req->rq_header);
- unit = (WVAL(p, 2) * WVAL(p, 4)) >> SMB_ST_BLKSHIFT;
- attr->f_blocks = WVAL(p, 0) * unit;
- attr->f_bsize = SMB_ST_BLKSIZE;
- attr->f_bavail = attr->f_bfree = WVAL(p, 6) * unit;
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-int
-smb_proc_read_link(struct smb_sb_info *server, struct dentry *d,
- char *buffer, int len)
-{
- char *p, *param;
- int result;
- struct smb_request *req;
-
- DEBUG1("readlink of %s/%s\n", DENTRY_PATH(d));
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
- param = req->rq_buffer;
-
- WSET(param, 0, SMB_QUERY_FILE_UNIX_LINK);
- DSET(param, 2, 0);
- result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
- if (result < 0)
- goto out_free;
- p = param + 6 + result;
-
- req->rq_trans2_command = TRANSACT2_QPATHINFO;
- req->rq_ldata = 0;
- req->rq_data = NULL;
- req->rq_lparm = p - param;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
- DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
- &param[6], result, req->rq_rcls, req->rq_err);
-
- /* copy data up to the \0 or buffer length */
- result = len;
- if (req->rq_ldata < len)
- result = req->rq_ldata;
- strncpy(buffer, req->rq_data, result);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-
-/*
- * Create a symlink object called dentry which points to oldpath.
- * Samba does not permit dangling links but returns a suitable error message.
- */
-int
-smb_proc_symlink(struct smb_sb_info *server, struct dentry *d,
- const char *oldpath)
-{
- char *p, *param;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
- param = req->rq_buffer;
-
- WSET(param, 0, SMB_SET_FILE_UNIX_LINK);
- DSET(param, 2, 0);
- result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1, d, NULL);
- if (result < 0)
- goto out_free;
- p = param + 6 + result;
-
- req->rq_trans2_command = TRANSACT2_SETPATHINFO;
- req->rq_ldata = strlen(oldpath) + 1;
- req->rq_data = (char *) oldpath;
- req->rq_lparm = p - param;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
-
- DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
- &param[6], result, req->rq_rcls, req->rq_err);
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * Create a hard link object called new_dentry which points to dentry.
- */
-int
-smb_proc_link(struct smb_sb_info *server, struct dentry *dentry,
- struct dentry *new_dentry)
-{
- char *p, *param;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
- param = req->rq_buffer;
-
- WSET(param, 0, SMB_SET_FILE_UNIX_HLINK);
- DSET(param, 2, 0);
- result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1,
- new_dentry, NULL);
- if (result < 0)
- goto out_free;
- p = param + 6 + result;
-
- /* Grr, pointless separation of parameters and data ... */
- req->rq_data = p;
- req->rq_ldata = smb_encode_path(server, p, SMB_MAXPATHLEN+1,
- dentry, NULL);
-
- req->rq_trans2_command = TRANSACT2_SETPATHINFO;
- req->rq_lparm = p - param;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
-
- DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
- &param[6], result, req->rq_rcls, req->rq_err);
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_query_cifsunix(struct smb_sb_info *server)
-{
- int result;
- int major, minor;
- u64 caps;
- char param[2];
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 100)))
- goto out;
-
- WSET(param, 0, SMB_QUERY_CIFS_UNIX_INFO);
-
- req->rq_trans2_command = TRANSACT2_QFSINFO;
- req->rq_ldata = 0;
- req->rq_data = NULL;
- req->rq_lparm = 2;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
-
- if (req->rq_ldata < 12) {
- PARANOIA("Not enough data\n");
- goto out_free;
- }
- major = WVAL(req->rq_data, 0);
- minor = WVAL(req->rq_data, 2);
-
- DEBUG1("Server implements CIFS Extensions for UNIX systems v%d.%d\n",
- major, minor);
- /* FIXME: verify that we are ok with this major/minor? */
-
- caps = LVAL(req->rq_data, 4);
- DEBUG1("Server capabilities 0x%016llx\n", caps);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-
-static void
-install_ops(struct smb_ops *dst, struct smb_ops *src)
-{
- memcpy(dst, src, sizeof(void *) * SMB_OPS_NUM_STATIC);
-}
-
-/* < LANMAN2 */
-static struct smb_ops smb_ops_core =
-{
- .read = smb_proc_read,
- .write = smb_proc_write,
- .readdir = smb_proc_readdir_short,
- .getattr = smb_proc_getattr_core,
- .truncate = smb_proc_trunc32,
-};
-
-/* LANMAN2, OS/2, others? */
-static struct smb_ops smb_ops_os2 =
-{
- .read = smb_proc_read,
- .write = smb_proc_write,
- .readdir = smb_proc_readdir_long,
- .getattr = smb_proc_getattr_trans2_std,
- .truncate = smb_proc_trunc32,
-};
-
-/* Win95, and possibly some NetApp versions too */
-static struct smb_ops smb_ops_win95 =
-{
- .read = smb_proc_read, /* does not support 12word readX */
- .write = smb_proc_write,
- .readdir = smb_proc_readdir_long,
- .getattr = smb_proc_getattr_95,
- .truncate = smb_proc_trunc95,
-};
-
-/* Samba, NT4 and NT5 */
-static struct smb_ops smb_ops_winNT =
-{
- .read = smb_proc_readX,
- .write = smb_proc_writeX,
- .readdir = smb_proc_readdir_long,
- .getattr = smb_proc_getattr_trans2_all,
- .truncate = smb_proc_trunc64,
-};
-
-/* Samba w/ unix extensions. Others? */
-static struct smb_ops smb_ops_unix =
-{
- .read = smb_proc_readX,
- .write = smb_proc_writeX,
- .readdir = smb_proc_readdir_long,
- .getattr = smb_proc_getattr_unix,
- /* FIXME: core/ext/time setattr needs to be cleaned up! */
- /* .setattr = smb_proc_setattr_unix, */
- .truncate = smb_proc_trunc64,
-};
-
-/* Place holder until real ops are in place */
-static struct smb_ops smb_ops_null =
-{
- .readdir = smb_proc_readdir_null,
- .getattr = smb_proc_getattr_null,
-};
-
-void smb_install_null_ops(struct smb_ops *ops)
-{
- install_ops(ops, &smb_ops_null);
-}
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
deleted file mode 100644
index 05939a6..0000000
--- a/fs/smbfs/proto.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Autogenerated with cproto on: Sat Sep 13 17:18:51 CEST 2003
- */
-
-struct smb_request;
-struct sock;
-struct statfs;
-
-/* proc.c */
-extern int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp);
-extern __u32 smb_len(__u8 *p);
-extern int smb_get_rsize(struct smb_sb_info *server);
-extern int smb_get_wsize(struct smb_sb_info *server);
-extern int smb_errno(struct smb_request *req);
-extern int smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt);
-extern __u8 *smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc);
-extern int smb_open(struct dentry *dentry, int wish);
-extern int smb_close(struct inode *ino);
-extern int smb_close_fileid(struct dentry *dentry, __u16 fileid);
-extern int smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid);
-extern int smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry);
-extern int smb_proc_mkdir(struct dentry *dentry);
-extern int smb_proc_rmdir(struct dentry *dentry);
-extern int smb_proc_unlink(struct dentry *dentry);
-extern int smb_proc_flush(struct smb_sb_info *server, __u16 fileid);
-extern void smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
- struct super_block *sb);
-extern int smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr);
-extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr);
-extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor);
-extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr);
-extern int smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr);
-extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len);
-extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const char *oldpath);
-extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
-extern void smb_install_null_ops(struct smb_ops *ops);
-/* dir.c */
-extern const struct file_operations smb_dir_operations;
-extern const struct inode_operations smb_dir_inode_operations;
-extern const struct inode_operations smb_dir_inode_operations_unix;
-extern void smb_new_dentry(struct dentry *dentry);
-extern void smb_renew_times(struct dentry *dentry);
-/* cache.c */
-extern void smb_invalid_dir_cache(struct inode *dir);
-extern void smb_invalidate_dircache_entries(struct dentry *parent);
-extern struct dentry *smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos);
-extern int smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, struct smb_cache_control *ctrl, struct qstr *qname, struct smb_fattr *entry);
-/* sock.c */
-extern void smb_data_ready(struct sock *sk, int len);
-extern int smb_valid_socket(struct inode *inode);
-extern void smb_close_socket(struct smb_sb_info *server);
-extern int smb_recv_available(struct smb_sb_info *server);
-extern int smb_receive_header(struct smb_sb_info *server);
-extern int smb_receive_drop(struct smb_sb_info *server);
-extern int smb_receive(struct smb_sb_info *server, struct smb_request *req);
-extern int smb_send_request(struct smb_request *req);
-/* inode.c */
-extern struct inode *smb_iget(struct super_block *sb, struct smb_fattr *fattr);
-extern void smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr);
-extern void smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr);
-extern void smb_invalidate_inodes(struct smb_sb_info *server);
-extern int smb_revalidate_inode(struct dentry *dentry);
-extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
-extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
-/* file.c */
-extern const struct address_space_operations smb_file_aops;
-extern const struct file_operations smb_file_operations;
-extern const struct inode_operations smb_file_inode_operations;
-/* ioctl.c */
-extern long smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-/* smbiod.c */
-extern void smbiod_wake_up(void);
-extern int smbiod_register_server(struct smb_sb_info *server);
-extern void smbiod_unregister_server(struct smb_sb_info *server);
-extern void smbiod_flush(struct smb_sb_info *server);
-extern int smbiod_retry(struct smb_sb_info *server);
-/* request.c */
-extern int smb_init_request_cache(void);
-extern void smb_destroy_request_cache(void);
-extern struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize);
-extern void smb_rput(struct smb_request *req);
-extern int smb_add_request(struct smb_request *req);
-extern int smb_request_send_server(struct smb_sb_info *server);
-extern int smb_request_recv(struct smb_sb_info *server);
-/* symlink.c */
-extern int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname);
-extern const struct inode_operations smb_link_inode_operations;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
deleted file mode 100644
index 45f4593..0000000
--- a/fs/smbfs/request.c
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- * request.c
- *
- * Copyright (C) 2001 by Urban Widmark
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/net.h>
-#include <linux/sched.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include "smb_debug.h"
-#include "request.h"
-#include "proto.h"
-
-/* #define SMB_SLAB_DEBUG (SLAB_RED_ZONE | SLAB_POISON) */
-#define SMB_SLAB_DEBUG 0
-
-/* cache for request structures */
-static struct kmem_cache *req_cachep;
-
-static int smb_request_send_req(struct smb_request *req);
-
-/*
- /proc/slabinfo:
- name, active, num, objsize, active_slabs, num_slaps, #pages
-*/
-
-
-int smb_init_request_cache(void)
-{
- req_cachep = kmem_cache_create("smb_request",
- sizeof(struct smb_request), 0,
- SMB_SLAB_DEBUG | SLAB_HWCACHE_ALIGN,
- NULL);
- if (req_cachep == NULL)
- return -ENOMEM;
-
- return 0;
-}
-
-void smb_destroy_request_cache(void)
-{
- kmem_cache_destroy(req_cachep);
-}
-
-/*
- * Allocate and initialise a request structure
- */
-static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
- int bufsize)
-{
- struct smb_request *req;
- unsigned char *buf = NULL;
-
- req = kmem_cache_zalloc(req_cachep, GFP_KERNEL);
- VERBOSE("allocating request: %p\n", req);
- if (!req)
- goto out;
-
- if (bufsize > 0) {
- buf = kmalloc(bufsize, GFP_NOFS);
- if (!buf) {
- kmem_cache_free(req_cachep, req);
- return NULL;
- }
- }
-
- req->rq_buffer = buf;
- req->rq_bufsize = bufsize;
- req->rq_server = server;
- init_waitqueue_head(&req->rq_wait);
- INIT_LIST_HEAD(&req->rq_queue);
- atomic_set(&req->rq_count, 1);
-
-out:
- return req;
-}
-
-struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize)
-{
- struct smb_request *req = NULL;
-
- for (;;) {
- atomic_inc(&server->nr_requests);
- if (atomic_read(&server->nr_requests) <= MAX_REQUEST_HARD) {
- req = smb_do_alloc_request(server, bufsize);
- if (req != NULL)
- break;
- }
-
-#if 0
- /*
- * Try to free up at least one request in order to stay
- * below the hard limit
- */
- if (nfs_try_to_free_pages(server))
- continue;
-
- if (fatal_signal_pending(current))
- return ERR_PTR(-ERESTARTSYS);
- current->policy = SCHED_YIELD;
- schedule();
-#else
- /* FIXME: we want something like nfs does above, but that
- requires changes to all callers and can wait. */
- break;
-#endif
- }
- return req;
-}
-
-static void smb_free_request(struct smb_request *req)
-{
- atomic_dec(&req->rq_server->nr_requests);
- if (req->rq_buffer && !(req->rq_flags & SMB_REQ_STATIC))
- kfree(req->rq_buffer);
- kfree(req->rq_trans2buffer);
- kmem_cache_free(req_cachep, req);
-}
-
-/*
- * What prevents a rget to race with a rput? The count must never drop to zero
- * while it is in use. Only rput if it is ok that it is free'd.
- */
-static void smb_rget(struct smb_request *req)
-{
- atomic_inc(&req->rq_count);
-}
-void smb_rput(struct smb_request *req)
-{
- if (atomic_dec_and_test(&req->rq_count)) {
- list_del_init(&req->rq_queue);
- smb_free_request(req);
- }
-}
-
-/* setup to receive the data part of the SMB */
-static int smb_setup_bcc(struct smb_request *req)
-{
- int result = 0;
- req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
-
- if (req->rq_rlen > req->rq_bufsize) {
- PARANOIA("Packet too large %d > %d\n",
- req->rq_rlen, req->rq_bufsize);
- return -ENOBUFS;
- }
-
- req->rq_iov[0].iov_base = req->rq_buffer;
- req->rq_iov[0].iov_len = req->rq_rlen;
- req->rq_iovlen = 1;
-
- return result;
-}
-
-/*
- * Prepare a "normal" request structure.
- */
-static int smb_setup_request(struct smb_request *req)
-{
- int len = smb_len(req->rq_header) + 4;
- req->rq_slen = len;
-
- /* if we expect a data part in the reply we set the iov's to read it */
- if (req->rq_resp_bcc)
- req->rq_setup_read = smb_setup_bcc;
-
- /* This tries to support re-using the same request */
- req->rq_bytes_sent = 0;
- req->rq_rcls = 0;
- req->rq_err = 0;
- req->rq_errno = 0;
- req->rq_fragment = 0;
- kfree(req->rq_trans2buffer);
- req->rq_trans2buffer = NULL;
-
- return 0;
-}
-
-/*
- * Prepare a transaction2 request structure
- */
-static int smb_setup_trans2request(struct smb_request *req)
-{
- struct smb_sb_info *server = req->rq_server;
- int mparam, mdata;
- static unsigned char padding[4];
-
- /* I know the following is very ugly, but I want to build the
- smb packet as efficiently as possible. */
-
- const int smb_parameters = 15;
- const int header = SMB_HEADER_LEN + 2 * smb_parameters + 2;
- const int oparam = ALIGN(header + 3, sizeof(u32));
- const int odata = ALIGN(oparam + req->rq_lparm, sizeof(u32));
- const int bcc = (req->rq_data ? odata + req->rq_ldata :
- oparam + req->rq_lparm) - header;
-
- if ((bcc + oparam) > server->opt.max_xmit)
- return -ENOMEM;
- smb_setup_header(req, SMBtrans2, smb_parameters, bcc);
-
- /*
- * max parameters + max data + max setup == bufsize to make NT4 happy
- * and not abort the transfer or split into multiple responses. It also
- * makes smbfs happy as handling packets larger than the buffer size
- * is extra work.
- *
- * OS/2 is probably going to hate me for this ...
- */
- mparam = SMB_TRANS2_MAX_PARAM;
- mdata = req->rq_bufsize - mparam;
-
- mdata = server->opt.max_xmit - mparam - 100;
- if (mdata < 1024) {
- mdata = 1024;
- mparam = 20;
- }
-
-#if 0
- /* NT/win2k has ~4k max_xmit, so with this we request more than it wants
- to return as one SMB. Useful for testing the fragmented trans2
- handling. */
- mdata = 8192;
-#endif
-
- WSET(req->rq_header, smb_tpscnt, req->rq_lparm);
- WSET(req->rq_header, smb_tdscnt, req->rq_ldata);
- WSET(req->rq_header, smb_mprcnt, mparam);
- WSET(req->rq_header, smb_mdrcnt, mdata);
- WSET(req->rq_header, smb_msrcnt, 0); /* max setup always 0 ? */
- WSET(req->rq_header, smb_flags, 0);
- DSET(req->rq_header, smb_timeout, 0);
- WSET(req->rq_header, smb_pscnt, req->rq_lparm);
- WSET(req->rq_header, smb_psoff, oparam - 4);
- WSET(req->rq_header, smb_dscnt, req->rq_ldata);
- WSET(req->rq_header, smb_dsoff, req->rq_data ? odata - 4 : 0);
- *(req->rq_header + smb_suwcnt) = 0x01; /* setup count */
- *(req->rq_header + smb_suwcnt + 1) = 0x00; /* reserved */
- WSET(req->rq_header, smb_setup0, req->rq_trans2_command);
-
- req->rq_iovlen = 2;
- req->rq_iov[0].iov_base = (void *) req->rq_header;
- req->rq_iov[0].iov_len = oparam;
- req->rq_iov[1].iov_base = (req->rq_parm==NULL) ? padding : req->rq_parm;
- req->rq_iov[1].iov_len = req->rq_lparm;
- req->rq_slen = oparam + req->rq_lparm;
-
- if (req->rq_data) {
- req->rq_iovlen += 2;
- req->rq_iov[2].iov_base = padding;
- req->rq_iov[2].iov_len = odata - oparam - req->rq_lparm;
- req->rq_iov[3].iov_base = req->rq_data;
- req->rq_iov[3].iov_len = req->rq_ldata;
- req->rq_slen = odata + req->rq_ldata;
- }
-
- /* always a data part for trans2 replies */
- req->rq_setup_read = smb_setup_bcc;
-
- return 0;
-}
-
-/*
- * Add a request and tell smbiod to process it
- */
-int smb_add_request(struct smb_request *req)
-{
- long timeleft;
- struct smb_sb_info *server = req->rq_server;
- int result = 0;
-
- smb_setup_request(req);
- if (req->rq_trans2_command) {
- if (req->rq_buffer == NULL) {
- PARANOIA("trans2 attempted without response buffer!\n");
- return -EIO;
- }
- result = smb_setup_trans2request(req);
- }
- if (result < 0)
- return result;
-
-#ifdef SMB_DEBUG_PACKET_SIZE
- add_xmit_stats(req);
-#endif
-
- /* add 'req' to the queue of requests */
- if (smb_lock_server_interruptible(server))
- return -EINTR;
-
- /*
- * Try to send the request as the process. If that fails we queue the
- * request and let smbiod send it later.
- */
-
- /* FIXME: each server has a number on the maximum number of parallel
- requests. 10, 50 or so. We should not allow more requests to be
- active. */
- if (server->mid > 0xf000)
- server->mid = 0;
- req->rq_mid = server->mid++;
- WSET(req->rq_header, smb_mid, req->rq_mid);
-
- result = 0;
- if (server->state == CONN_VALID) {
- if (list_empty(&server->xmitq))
- result = smb_request_send_req(req);
- if (result < 0) {
- /* Connection lost? */
- server->conn_error = result;
- server->state = CONN_INVALID;
- }
- }
- if (result != 1)
- list_add_tail(&req->rq_queue, &server->xmitq);
- smb_rget(req);
-
- if (server->state != CONN_VALID)
- smbiod_retry(server);
-
- smb_unlock_server(server);
-
- smbiod_wake_up();
-
- timeleft = wait_event_interruptible_timeout(req->rq_wait,
- req->rq_flags & SMB_REQ_RECEIVED, 30*HZ);
- if (!timeleft || signal_pending(current)) {
- /*
- * On timeout or on interrupt we want to try and remove the
- * request from the recvq/xmitq.
- * First check if the request is still part of a queue. (May
- * have been removed by some error condition)
- */
- smb_lock_server(server);
- if (!list_empty(&req->rq_queue)) {
- list_del_init(&req->rq_queue);
- smb_rput(req);
- }
- smb_unlock_server(server);
- }
-
- if (!timeleft) {
- PARANOIA("request [%p, mid=%d] timed out!\n",
- req, req->rq_mid);
- VERBOSE("smb_com: %02x\n", *(req->rq_header + smb_com));
- VERBOSE("smb_rcls: %02x\n", *(req->rq_header + smb_rcls));
- VERBOSE("smb_flg: %02x\n", *(req->rq_header + smb_flg));
- VERBOSE("smb_tid: %04x\n", WVAL(req->rq_header, smb_tid));
- VERBOSE("smb_pid: %04x\n", WVAL(req->rq_header, smb_pid));
- VERBOSE("smb_uid: %04x\n", WVAL(req->rq_header, smb_uid));
- VERBOSE("smb_mid: %04x\n", WVAL(req->rq_header, smb_mid));
- VERBOSE("smb_wct: %02x\n", *(req->rq_header + smb_wct));
-
- req->rq_rcls = ERRSRV;
- req->rq_err = ERRtimeout;
-
- /* Just in case it was "stuck" */
- smbiod_wake_up();
- }
- VERBOSE("woke up, rcls=%d\n", req->rq_rcls);
-
- if (req->rq_rcls != 0)
- req->rq_errno = smb_errno(req);
- if (signal_pending(current))
- req->rq_errno = -ERESTARTSYS;
- return req->rq_errno;
-}
-
-/*
- * Send a request and place it on the recvq if successfully sent.
- * Must be called with the server lock held.
- */
-static int smb_request_send_req(struct smb_request *req)
-{
- struct smb_sb_info *server = req->rq_server;
- int result;
-
- if (req->rq_bytes_sent == 0) {
- WSET(req->rq_header, smb_tid, server->opt.tid);
- WSET(req->rq_header, smb_pid, 1);
- WSET(req->rq_header, smb_uid, server->opt.server_uid);
- }
-
- result = smb_send_request(req);
- if (result < 0 && result != -EAGAIN)
- goto out;
-
- result = 0;
- if (!(req->rq_flags & SMB_REQ_TRANSMITTED))
- goto out;
-
- list_move_tail(&req->rq_queue, &server->recvq);
- result = 1;
-out:
- return result;
-}
-
-/*
- * Sends one request for this server. (smbiod)
- * Must be called with the server lock held.
- * Returns: <0 on error
- * 0 if no request could be completely sent
- * 1 if all data for one request was sent
- */
-int smb_request_send_server(struct smb_sb_info *server)
-{
- struct list_head *head;
- struct smb_request *req;
- int result;
-
- if (server->state != CONN_VALID)
- return 0;
-
- /* dequeue first request, if any */
- req = NULL;
- head = server->xmitq.next;
- if (head != &server->xmitq) {
- req = list_entry(head, struct smb_request, rq_queue);
- }
- if (!req)
- return 0;
-
- result = smb_request_send_req(req);
- if (result < 0) {
- server->conn_error = result;
- list_move(&req->rq_queue, &server->xmitq);
- result = -EIO;
- goto out;
- }
-
-out:
- return result;
-}
-
-/*
- * Try to find a request matching this "mid". Typically the first entry will
- * be the matching one.
- */
-static struct smb_request *find_request(struct smb_sb_info *server, int mid)
-{
- struct list_head *tmp;
- struct smb_request *req = NULL;
-
- list_for_each(tmp, &server->recvq) {
- req = list_entry(tmp, struct smb_request, rq_queue);
- if (req->rq_mid == mid) {
- break;
- }
- req = NULL;
- }
-
- if (!req) {
- VERBOSE("received reply with mid %d but no request!\n",
- WVAL(server->header, smb_mid));
- server->rstate = SMB_RECV_DROP;
- }
-
- return req;
-}
-
-/*
- * Called when we have read the smb header and believe this is a response.
- */
-static int smb_init_request(struct smb_sb_info *server, struct smb_request *req)
-{
- int hdrlen, wct;
-
- memcpy(req->rq_header, server->header, SMB_HEADER_LEN);
-
- wct = *(req->rq_header + smb_wct);
- if (wct > 20) {
- PARANOIA("wct too large, %d > 20\n", wct);
- server->rstate = SMB_RECV_DROP;
- return 0;
- }
-
- req->rq_resp_wct = wct;
- hdrlen = SMB_HEADER_LEN + wct*2 + 2;
- VERBOSE("header length: %d smb_wct: %2d\n", hdrlen, wct);
-
- req->rq_bytes_recvd = SMB_HEADER_LEN;
- req->rq_rlen = hdrlen;
- req->rq_iov[0].iov_base = req->rq_header;
- req->rq_iov[0].iov_len = hdrlen;
- req->rq_iovlen = 1;
- server->rstate = SMB_RECV_PARAM;
-
-#ifdef SMB_DEBUG_PACKET_SIZE
- add_recv_stats(smb_len(server->header));
-#endif
- return 0;
-}
-
-/*
- * Reads the SMB parameters
- */
-static int smb_recv_param(struct smb_sb_info *server, struct smb_request *req)
-{
- int result;
-
- result = smb_receive(server, req);
- if (result < 0)
- return result;
- if (req->rq_bytes_recvd < req->rq_rlen)
- return 0;
-
- VERBOSE("result: %d smb_bcc: %04x\n", result,
- WVAL(req->rq_header, SMB_HEADER_LEN +
- (*(req->rq_header + smb_wct) * 2)));
-
- result = 0;
- req->rq_iov[0].iov_base = NULL;
- req->rq_rlen = 0;
- if (req->rq_callback)
- req->rq_callback(req);
- else if (req->rq_setup_read)
- result = req->rq_setup_read(req);
- if (result < 0) {
- server->rstate = SMB_RECV_DROP;
- return result;
- }
-
- server->rstate = req->rq_rlen > 0 ? SMB_RECV_DATA : SMB_RECV_END;
-
- req->rq_bytes_recvd = 0; // recvd out of the iov
-
- VERBOSE("rlen: %d\n", req->rq_rlen);
- if (req->rq_rlen < 0) {
- PARANOIA("Parameters read beyond end of packet!\n");
- server->rstate = SMB_RECV_END;
- return -EIO;
- }
- return 0;
-}
-
-/*
- * Reads the SMB data
- */
-static int smb_recv_data(struct smb_sb_info *server, struct smb_request *req)
-{
- int result;
-
- result = smb_receive(server, req);
- if (result < 0)
- goto out;
- if (req->rq_bytes_recvd < req->rq_rlen)
- goto out;
- server->rstate = SMB_RECV_END;
-out:
- VERBOSE("result: %d\n", result);
- return result;
-}
-
-/*
- * Receive a transaction2 response
- * Return: 0 if the response has been fully read
- * 1 if there are further "fragments" to read
- * <0 if there is an error
- */
-static int smb_recv_trans2(struct smb_sb_info *server, struct smb_request *req)
-{
- unsigned char *inbuf;
- unsigned int parm_disp, parm_offset, parm_count, parm_tot;
- unsigned int data_disp, data_offset, data_count, data_tot;
- int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
-
- VERBOSE("handling trans2\n");
-
- inbuf = req->rq_header;
- data_tot = WVAL(inbuf, smb_tdrcnt);
- parm_tot = WVAL(inbuf, smb_tprcnt);
- parm_disp = WVAL(inbuf, smb_prdisp);
- parm_offset = WVAL(inbuf, smb_proff);
- parm_count = WVAL(inbuf, smb_prcnt);
- data_disp = WVAL(inbuf, smb_drdisp);
- data_offset = WVAL(inbuf, smb_droff);
- data_count = WVAL(inbuf, smb_drcnt);
-
- /* Modify offset for the split header/buffer we use */
- if (data_count || data_offset) {
- if (unlikely(data_offset < hdrlen))
- goto out_bad_data;
- else
- data_offset -= hdrlen;
- }
- if (parm_count || parm_offset) {
- if (unlikely(parm_offset < hdrlen))
- goto out_bad_parm;
- else
- parm_offset -= hdrlen;
- }
-
- if (parm_count == parm_tot && data_count == data_tot) {
- /*
- * This packet has all the trans2 data.
- *
- * We setup the request so that this will be the common
- * case. It may be a server error to not return a
- * response that fits.
- */
- VERBOSE("single trans2 response "
- "dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
- data_count, parm_count,
- data_offset, parm_offset);
- req->rq_ldata = data_count;
- req->rq_lparm = parm_count;
- req->rq_data = req->rq_buffer + data_offset;
- req->rq_parm = req->rq_buffer + parm_offset;
- if (unlikely(parm_offset + parm_count > req->rq_rlen))
- goto out_bad_parm;
- if (unlikely(data_offset + data_count > req->rq_rlen))
- goto out_bad_data;
- return 0;
- }
-
- VERBOSE("multi trans2 response "
- "frag=%d, dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
- req->rq_fragment,
- data_count, parm_count,
- data_offset, parm_offset);
-
- if (!req->rq_fragment) {
- int buf_len;
-
- /* We got the first trans2 fragment */
- req->rq_fragment = 1;
- req->rq_total_data = data_tot;
- req->rq_total_parm = parm_tot;
- req->rq_ldata = 0;
- req->rq_lparm = 0;
-
- buf_len = data_tot + parm_tot;
- if (buf_len > SMB_MAX_PACKET_SIZE)
- goto out_too_long;
-
- req->rq_trans2bufsize = buf_len;
- req->rq_trans2buffer = kzalloc(buf_len, GFP_NOFS);
- if (!req->rq_trans2buffer)
- goto out_no_mem;
-
- req->rq_parm = req->rq_trans2buffer;
- req->rq_data = req->rq_trans2buffer + parm_tot;
- } else if (unlikely(req->rq_total_data < data_tot ||
- req->rq_total_parm < parm_tot))
- goto out_data_grew;
-
- if (unlikely(parm_disp + parm_count > req->rq_total_parm ||
- parm_offset + parm_count > req->rq_rlen))
- goto out_bad_parm;
- if (unlikely(data_disp + data_count > req->rq_total_data ||
- data_offset + data_count > req->rq_rlen))
- goto out_bad_data;
-
- inbuf = req->rq_buffer;
- memcpy(req->rq_parm + parm_disp, inbuf + parm_offset, parm_count);
- memcpy(req->rq_data + data_disp, inbuf + data_offset, data_count);
-
- req->rq_ldata += data_count;
- req->rq_lparm += parm_count;
-
- /*
- * Check whether we've received all of the data. Note that
- * we use the packet totals -- total lengths might shrink!
- */
- if (req->rq_ldata >= data_tot && req->rq_lparm >= parm_tot) {
- req->rq_ldata = data_tot;
- req->rq_lparm = parm_tot;
- return 0;
- }
- return 1;
-
-out_too_long:
- printk(KERN_ERR "smb_trans2: data/param too long, data=%u, parm=%u\n",
- data_tot, parm_tot);
- goto out_EIO;
-out_no_mem:
- printk(KERN_ERR "smb_trans2: couldn't allocate data area of %d bytes\n",
- req->rq_trans2bufsize);
- req->rq_errno = -ENOMEM;
- goto out;
-out_data_grew:
- printk(KERN_ERR "smb_trans2: data/params grew!\n");
- goto out_EIO;
-out_bad_parm:
- printk(KERN_ERR "smb_trans2: invalid parms, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
- parm_disp, parm_count, parm_tot, parm_offset);
- goto out_EIO;
-out_bad_data:
- printk(KERN_ERR "smb_trans2: invalid data, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
- data_disp, data_count, data_tot, data_offset);
-out_EIO:
- req->rq_errno = -EIO;
-out:
- return req->rq_errno;
-}
-
-/*
- * State machine for receiving responses. We handle the fact that we can't
- * read the full response in one try by having states telling us how much we
- * have read.
- *
- * Must be called with the server lock held (only called from smbiod).
- *
- * Return: <0 on error
- */
-int smb_request_recv(struct smb_sb_info *server)
-{
- struct smb_request *req = NULL;
- int result = 0;
-
- if (smb_recv_available(server) <= 0)
- return 0;
-
- VERBOSE("state: %d\n", server->rstate);
- switch (server->rstate) {
- case SMB_RECV_DROP:
- result = smb_receive_drop(server);
- if (result < 0)
- break;
- if (server->rstate == SMB_RECV_DROP)
- break;
- server->rstate = SMB_RECV_START;
- /* fallthrough */
- case SMB_RECV_START:
- server->smb_read = 0;
- server->rstate = SMB_RECV_HEADER;
- /* fallthrough */
- case SMB_RECV_HEADER:
- result = smb_receive_header(server);
- if (result < 0)
- break;
- if (server->rstate == SMB_RECV_HEADER)
- break;
- if (! (*(server->header + smb_flg) & SMB_FLAGS_REPLY) ) {
- server->rstate = SMB_RECV_REQUEST;
- break;
- }
- if (server->rstate != SMB_RECV_HCOMPLETE)
- break;
- /* fallthrough */
- case SMB_RECV_HCOMPLETE:
- req = find_request(server, WVAL(server->header, smb_mid));
- if (!req)
- break;
- smb_init_request(server, req);
- req->rq_rcls = *(req->rq_header + smb_rcls);
- req->rq_err = WVAL(req->rq_header, smb_err);
- if (server->rstate != SMB_RECV_PARAM)
- break;
- /* fallthrough */
- case SMB_RECV_PARAM:
- if (!req)
- req = find_request(server,WVAL(server->header,smb_mid));
- if (!req)
- break;
- result = smb_recv_param(server, req);
- if (result < 0)
- break;
- if (server->rstate != SMB_RECV_DATA)
- break;
- /* fallthrough */
- case SMB_RECV_DATA:
- if (!req)
- req = find_request(server,WVAL(server->header,smb_mid));
- if (!req)
- break;
- result = smb_recv_data(server, req);
- if (result < 0)
- break;
- break;
-
- /* We should never be called with any of these states */
- case SMB_RECV_END:
- case SMB_RECV_REQUEST:
- BUG();
- }
-
- if (result < 0) {
- /* We saw an error */
- return result;
- }
-
- if (server->rstate != SMB_RECV_END)
- return 0;
-
- result = 0;
- if (req->rq_trans2_command && req->rq_rcls == SUCCESS)
- result = smb_recv_trans2(server, req);
-
- /*
- * Response completely read. Drop any extra bytes sent by the server.
- * (Yes, servers sometimes add extra bytes to responses)
- */
- VERBOSE("smb_len: %d smb_read: %d\n",
- server->smb_len, server->smb_read);
- if (server->smb_read < server->smb_len)
- smb_receive_drop(server);
-
- server->rstate = SMB_RECV_START;
-
- if (!result) {
- list_del_init(&req->rq_queue);
- req->rq_flags |= SMB_REQ_RECEIVED;
- smb_rput(req);
- wake_up_interruptible(&req->rq_wait);
- }
- return 0;
-}
diff --git a/fs/smbfs/request.h b/fs/smbfs/request.h
deleted file mode 100644
index efb2145..0000000
--- a/fs/smbfs/request.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/uio.h>
-#include <linux/wait.h>
-
-struct smb_request {
- struct list_head rq_queue; /* recvq or xmitq for the server */
-
- atomic_t rq_count;
-
- wait_queue_head_t rq_wait;
- int rq_flags;
- int rq_mid; /* multiplex ID, set by request.c */
-
- struct smb_sb_info *rq_server;
-
- /* header + word count + parameter words + byte count */
- unsigned char rq_header[SMB_HEADER_LEN + 20*2 + 2];
-
- int rq_bufsize;
- unsigned char *rq_buffer;
-
- /* FIXME: this is not good enough for merging IO requests. */
- unsigned char *rq_page;
- int rq_rsize;
-
- int rq_resp_wct;
- int rq_resp_bcc;
-
- int rq_rlen;
- int rq_bytes_recvd;
-
- int rq_slen;
- int rq_bytes_sent;
-
- int rq_iovlen;
- struct kvec rq_iov[4];
-
- int (*rq_setup_read) (struct smb_request *);
- void (*rq_callback) (struct smb_request *);
-
- /* ------ trans2 stuff ------ */
-
- u16 rq_trans2_command; /* 0 if not a trans2 request */
- unsigned int rq_ldata;
- unsigned char *rq_data;
- unsigned int rq_lparm;
- unsigned char *rq_parm;
-
- int rq_fragment;
- u32 rq_total_data;
- u32 rq_total_parm;
- int rq_trans2bufsize;
- unsigned char *rq_trans2buffer;
-
- /* ------ response ------ */
-
- unsigned short rq_rcls;
- unsigned short rq_err;
- int rq_errno;
-};
-
-#define SMB_REQ_STATIC 0x0001 /* rq_buffer is static */
-#define SMB_REQ_NORETRY 0x0002 /* request is invalid after retry */
-
-#define SMB_REQ_TRANSMITTED 0x4000 /* all data has been sent */
-#define SMB_REQ_RECEIVED 0x8000 /* reply received, smbiod is done */
-
-#define xSMB_REQ_NOREPLY 0x0004 /* we don't want the reply (if any) */
-#define xSMB_REQ_NORECEIVER 0x0008 /* caller doesn't wait for response */
diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h
deleted file mode 100644
index fc4b1a5..0000000
--- a/fs/smbfs/smb_debug.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Defines some debug macros for smbfs.
- */
-
-/* This makes a dentry parent/child name pair. Useful for debugging printk's */
-#define DENTRY_PATH(dentry) \
- (dentry)->d_parent->d_name.name,(dentry)->d_name.name
-
-/*
- * safety checks that should never happen ???
- * these are normally enabled.
- */
-#ifdef SMBFS_PARANOIA
-# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
-#else
-# define PARANOIA(f, a...) do { ; } while(0)
-#endif
-
-/* lots of debug messages */
-#ifdef SMBFS_DEBUG_VERBOSE
-# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
-#else
-# define VERBOSE(f, a...) do { ; } while(0)
-#endif
-
-/*
- * "normal" debug messages, but not with a normal DEBUG define ... way
- * too common name.
- */
-#ifdef SMBFS_DEBUG
-#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
-#else
-#define DEBUG1(f, a...) do { ; } while(0)
-#endif
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
deleted file mode 100644
index 0e39a92..0000000
--- a/fs/smbfs/smbiod.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * smbiod.c
- *
- * Copyright (C) 2000, Charles Loep / Corel Corp.
- * Copyright (C) 2001, Urban Widmark
- */
-
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/file.h>
-#include <linux/dcache.h>
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/kthread.h>
-#include <net/ip.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include "smb_debug.h"
-#include "request.h"
-#include "proto.h"
-
-enum smbiod_state {
- SMBIOD_DEAD,
- SMBIOD_STARTING,
- SMBIOD_RUNNING,
-};
-
-static enum smbiod_state smbiod_state = SMBIOD_DEAD;
-static struct task_struct *smbiod_thread;
-static DECLARE_WAIT_QUEUE_HEAD(smbiod_wait);
-static LIST_HEAD(smb_servers);
-static DEFINE_SPINLOCK(servers_lock);
-
-#define SMBIOD_DATA_READY (1<<0)
-static unsigned long smbiod_flags;
-
-static int smbiod(void *);
-static int smbiod_start(void);
-
-/*
- * called when there's work for us to do
- */
-void smbiod_wake_up(void)
-{
- if (smbiod_state == SMBIOD_DEAD)
- return;
- set_bit(SMBIOD_DATA_READY, &smbiod_flags);
- wake_up_interruptible(&smbiod_wait);
-}
-
-/*
- * start smbiod if none is running
- */
-static int smbiod_start(void)
-{
- struct task_struct *tsk;
- int err = 0;
-
- if (smbiod_state != SMBIOD_DEAD)
- return 0;
- smbiod_state = SMBIOD_STARTING;
- __module_get(THIS_MODULE);
- spin_unlock(&servers_lock);
- tsk = kthread_run(smbiod, NULL, "smbiod");
- if (IS_ERR(tsk)) {
- err = PTR_ERR(tsk);
- module_put(THIS_MODULE);
- }
-
- spin_lock(&servers_lock);
- if (err < 0) {
- smbiod_state = SMBIOD_DEAD;
- smbiod_thread = NULL;
- } else {
- smbiod_state = SMBIOD_RUNNING;
- smbiod_thread = tsk;
- }
- return err;
-}
-
-/*
- * register a server & start smbiod if necessary
- */
-int smbiod_register_server(struct smb_sb_info *server)
-{
- int ret;
- spin_lock(&servers_lock);
- list_add(&server->entry, &smb_servers);
- VERBOSE("%p\n", server);
- ret = smbiod_start();
- spin_unlock(&servers_lock);
- return ret;
-}
-
-/*
- * Unregister a server
- * Must be called with the server lock held.
- */
-void smbiod_unregister_server(struct smb_sb_info *server)
-{
- spin_lock(&servers_lock);
- list_del_init(&server->entry);
- VERBOSE("%p\n", server);
- spin_unlock(&servers_lock);
-
- smbiod_wake_up();
- smbiod_flush(server);
-}
-
-void smbiod_flush(struct smb_sb_info *server)
-{
- struct list_head *tmp, *n;
- struct smb_request *req;
-
- list_for_each_safe(tmp, n, &server->xmitq) {
- req = list_entry(tmp, struct smb_request, rq_queue);
- req->rq_errno = -EIO;
- list_del_init(&req->rq_queue);
- smb_rput(req);
- wake_up_interruptible(&req->rq_wait);
- }
- list_for_each_safe(tmp, n, &server->recvq) {
- req = list_entry(tmp, struct smb_request, rq_queue);
- req->rq_errno = -EIO;
- list_del_init(&req->rq_queue);
- smb_rput(req);
- wake_up_interruptible(&req->rq_wait);
- }
-}
-
-/*
- * Wake up smbmount and make it reconnect to the server.
- * This must be called with the server locked.
- *
- * FIXME: add smbconnect version to this
- */
-int smbiod_retry(struct smb_sb_info *server)
-{
- struct list_head *head;
- struct smb_request *req;
- struct pid *pid = get_pid(server->conn_pid);
- int result = 0;
-
- VERBOSE("state: %d\n", server->state);
- if (server->state == CONN_VALID || server->state == CONN_RETRYING)
- goto out;
-
- smb_invalidate_inodes(server);
-
- /*
- * Some requests are meaningless after a retry, so we abort them.
- * One example are all requests using 'fileid' since the files are
- * closed on retry.
- */
- head = server->xmitq.next;
- while (head != &server->xmitq) {
- req = list_entry(head, struct smb_request, rq_queue);
- head = head->next;
-
- req->rq_bytes_sent = 0;
- if (req->rq_flags & SMB_REQ_NORETRY) {
- VERBOSE("aborting request %p on xmitq\n", req);
- req->rq_errno = -EIO;
- list_del_init(&req->rq_queue);
- smb_rput(req);
- wake_up_interruptible(&req->rq_wait);
- }
- }
-
- /*
- * FIXME: test the code for retrying request we already sent
- */
- head = server->recvq.next;
- while (head != &server->recvq) {
- req = list_entry(head, struct smb_request, rq_queue);
- head = head->next;
-#if 0
- if (req->rq_flags & SMB_REQ_RETRY) {
- /* must move the request to the xmitq */
- VERBOSE("retrying request %p on recvq\n", req);
- list_move(&req->rq_queue, &server->xmitq);
- continue;
- }
-#endif
-
- VERBOSE("aborting request %p on recvq\n", req);
- /* req->rq_rcls = ???; */ /* FIXME: set smb error code too? */
- req->rq_errno = -EIO;
- list_del_init(&req->rq_queue);
- smb_rput(req);
- wake_up_interruptible(&req->rq_wait);
- }
-
- smb_close_socket(server);
-
- if (!pid) {
- /* FIXME: this is fatal, umount? */
- printk(KERN_ERR "smb_retry: no connection process\n");
- server->state = CONN_RETRIED;
- goto out;
- }
-
- /*
- * Change state so that only one retry per server will be started.
- */
- server->state = CONN_RETRYING;
-
- /*
- * Note: use the "priv" flag, as a user process may need to reconnect.
- */
- result = kill_pid(pid, SIGUSR1, 1);
- if (result) {
- /* FIXME: this is most likely fatal, umount? */
- printk(KERN_ERR "smb_retry: signal failed [%d]\n", result);
- goto out;
- }
- VERBOSE("signalled pid %d\n", pid_nr(pid));
-
- /* FIXME: The retried requests should perhaps get a "time boost". */
-
-out:
- put_pid(pid);
- return result;
-}
-
-/*
- * Currently handles lockingX packets.
- */
-static void smbiod_handle_request(struct smb_sb_info *server)
-{
- PARANOIA("smbiod got a request ... and we don't implement oplocks!\n");
- server->rstate = SMB_RECV_DROP;
-}
-
-/*
- * Do some IO for one server.
- */
-static void smbiod_doio(struct smb_sb_info *server)
-{
- int result;
- int maxwork = 7;
-
- if (server->state != CONN_VALID)
- goto out;
-
- do {
- result = smb_request_recv(server);
- if (result < 0) {
- server->state = CONN_INVALID;
- smbiod_retry(server);
- goto out; /* reconnecting is slow */
- } else if (server->rstate == SMB_RECV_REQUEST)
- smbiod_handle_request(server);
- } while (result > 0 && maxwork-- > 0);
-
- /*
- * If there is more to read then we want to be sure to wake up again.
- */
- if (server->state != CONN_VALID)
- goto out;
- if (smb_recv_available(server) > 0)
- set_bit(SMBIOD_DATA_READY, &smbiod_flags);
-
- do {
- result = smb_request_send_server(server);
- if (result < 0) {
- server->state = CONN_INVALID;
- smbiod_retry(server);
- goto out; /* reconnecting is slow */
- }
- } while (result > 0);
-
- /*
- * If the last request was not sent out we want to wake up again.
- */
- if (!list_empty(&server->xmitq))
- set_bit(SMBIOD_DATA_READY, &smbiod_flags);
-
-out:
- return;
-}
-
-/*
- * smbiod kernel thread
- */
-static int smbiod(void *unused)
-{
- VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid);
-
- for (;;) {
- struct smb_sb_info *server;
- struct list_head *pos, *n;
-
- /* FIXME: Use poll? */
- wait_event_interruptible(smbiod_wait,
- test_bit(SMBIOD_DATA_READY, &smbiod_flags));
- if (signal_pending(current)) {
- spin_lock(&servers_lock);
- smbiod_state = SMBIOD_DEAD;
- spin_unlock(&servers_lock);
- break;
- }
-
- clear_bit(SMBIOD_DATA_READY, &smbiod_flags);
-
- spin_lock(&servers_lock);
- if (list_empty(&smb_servers)) {
- smbiod_state = SMBIOD_DEAD;
- spin_unlock(&servers_lock);
- break;
- }
-
- list_for_each_safe(pos, n, &smb_servers) {
- server = list_entry(pos, struct smb_sb_info, entry);
- VERBOSE("checking server %p\n", server);
-
- if (server->state == CONN_VALID) {
- spin_unlock(&servers_lock);
-
- smb_lock_server(server);
- smbiod_doio(server);
- smb_unlock_server(server);
-
- spin_lock(&servers_lock);
- }
- }
- spin_unlock(&servers_lock);
- }
-
- VERBOSE("SMB Kernel thread exiting (%d) ...\n", current->pid);
- module_put_and_exit(0);
-}
diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c
deleted file mode 100644
index e37fe4d..0000000
--- a/fs/smbfs/sock.c
+++ /dev/null
@@ -1,386 +0,0 @@
-/*
- * sock.c
- *
- * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- * Copyright (C) 1997 by Volker Lendecke
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/socket.h>
-#include <linux/fcntl.h>
-#include <linux/file.h>
-#include <linux/in.h>
-#include <linux/net.h>
-#include <linux/mm.h>
-#include <linux/netdevice.h>
-#include <linux/workqueue.h>
-#include <net/scm.h>
-#include <net/tcp_states.h>
-#include <net/ip.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smb.h>
-#include <linux/smbno.h>
-
-#include <asm/uaccess.h>
-#include <asm/ioctls.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-#include "request.h"
-
-
-static int
-_recvfrom(struct socket *socket, unsigned char *ubuf, int size, unsigned flags)
-{
- struct kvec iov = {ubuf, size};
- struct msghdr msg = {.msg_flags = flags};
- msg.msg_flags |= MSG_DONTWAIT | MSG_NOSIGNAL;
- return kernel_recvmsg(socket, &msg, &iov, 1, size, msg.msg_flags);
-}
-
-/*
- * Return the server this socket belongs to
- */
-static struct smb_sb_info *
-server_from_socket(struct socket *socket)
-{
- return socket->sk->sk_user_data;
-}
-
-/*
- * Called when there is data on the socket.
- */
-void
-smb_data_ready(struct sock *sk, int len)
-{
- struct smb_sb_info *server = server_from_socket(sk->sk_socket);
- void (*data_ready)(struct sock *, int) = server->data_ready;
-
- data_ready(sk, len);
- VERBOSE("(%p, %d)\n", sk, len);
- smbiod_wake_up();
-}
-
-int
-smb_valid_socket(struct inode * inode)
-{
- return (inode && S_ISSOCK(inode->i_mode) &&
- SOCKET_I(inode)->type == SOCK_STREAM);
-}
-
-static struct socket *
-server_sock(struct smb_sb_info *server)
-{
- struct file *file;
-
- if (server && (file = server->sock_file))
- {
-#ifdef SMBFS_PARANOIA
- if (!smb_valid_socket(file->f_path.dentry->d_inode))
- PARANOIA("bad socket!\n");
-#endif
- return SOCKET_I(file->f_path.dentry->d_inode);
- }
- return NULL;
-}
-
-void
-smb_close_socket(struct smb_sb_info *server)
-{
- struct file * file = server->sock_file;
-
- if (file) {
- struct socket *sock = server_sock(server);
-
- VERBOSE("closing socket %p\n", sock);
- sock->sk->sk_data_ready = server->data_ready;
- server->sock_file = NULL;
- fput(file);
- }
-}
-
-static int
-smb_get_length(struct socket *socket, unsigned char *header)
-{
- int result;
-
- result = _recvfrom(socket, header, 4, MSG_PEEK);
- if (result == -EAGAIN)
- return -ENODATA;
- if (result < 0) {
- PARANOIA("recv error = %d\n", -result);
- return result;
- }
- if (result < 4)
- return -ENODATA;
-
- switch (header[0]) {
- case 0x00:
- case 0x82:
- break;
-
- case 0x85:
- DEBUG1("Got SESSION KEEP ALIVE\n");
- _recvfrom(socket, header, 4, 0); /* read away */
- return -ENODATA;
-
- default:
- PARANOIA("Invalid NBT packet, code=%x\n", header[0]);
- return -EIO;
- }
-
- /* The length in the RFC NB header is the raw data length */
- return smb_len(header);
-}
-
-int
-smb_recv_available(struct smb_sb_info *server)
-{
- mm_segment_t oldfs;
- int avail, err;
- struct socket *sock = server_sock(server);
-
- oldfs = get_fs();
- set_fs(get_ds());
- err = sock->ops->ioctl(sock, SIOCINQ, (unsigned long) &avail);
- set_fs(oldfs);
- return (err >= 0) ? avail : err;
-}
-
-/*
- * Adjust the kvec to move on 'n' bytes (from nfs/sunrpc)
- */
-static int
-smb_move_iov(struct kvec **data, size_t *num, struct kvec *vec, unsigned amount)
-{
- struct kvec *iv = *data;
- int i;
- int len;
-
- /*
- * Eat any sent kvecs
- */
- while (iv->iov_len <= amount) {
- amount -= iv->iov_len;
- iv++;
- (*num)--;
- }
-
- /*
- * And chew down the partial one
- */
- vec[0].iov_len = iv->iov_len-amount;
- vec[0].iov_base =((unsigned char *)iv->iov_base)+amount;
- iv++;
-
- len = vec[0].iov_len;
-
- /*
- * And copy any others
- */
- for (i = 1; i < *num; i++) {
- vec[i] = *iv++;
- len += vec[i].iov_len;
- }
-
- *data = vec;
- return len;
-}
-
-/*
- * smb_receive_header
- * Only called by the smbiod thread.
- */
-int
-smb_receive_header(struct smb_sb_info *server)
-{
- struct socket *sock;
- int result = 0;
- unsigned char peek_buf[4];
-
- result = -EIO;
- sock = server_sock(server);
- if (!sock)
- goto out;
- if (sock->sk->sk_state != TCP_ESTABLISHED)
- goto out;
-
- if (!server->smb_read) {
- result = smb_get_length(sock, peek_buf);
- if (result < 0) {
- if (result == -ENODATA)
- result = 0;
- goto out;
- }
- server->smb_len = result + 4;
-
- if (server->smb_len < SMB_HEADER_LEN) {
- PARANOIA("short packet: %d\n", result);
- server->rstate = SMB_RECV_DROP;
- result = -EIO;
- goto out;
- }
- if (server->smb_len > SMB_MAX_PACKET_SIZE) {
- PARANOIA("long packet: %d\n", result);
- server->rstate = SMB_RECV_DROP;
- result = -EIO;
- goto out;
- }
- }
-
- result = _recvfrom(sock, server->header + server->smb_read,
- SMB_HEADER_LEN - server->smb_read, 0);
- VERBOSE("_recvfrom: %d\n", result);
- if (result < 0) {
- VERBOSE("receive error: %d\n", result);
- goto out;
- }
- server->smb_read += result;
-
- if (server->smb_read == SMB_HEADER_LEN)
- server->rstate = SMB_RECV_HCOMPLETE;
-out:
- return result;
-}
-
-static char drop_buffer[PAGE_SIZE];
-
-/*
- * smb_receive_drop - read and throw away the data
- * Only called by the smbiod thread.
- *
- * FIXME: we are in the kernel, could we just tell the socket that we want
- * to drop stuff from the buffer?
- */
-int
-smb_receive_drop(struct smb_sb_info *server)
-{
- struct socket *sock;
- unsigned int flags;
- struct kvec iov;
- struct msghdr msg;
- int rlen = smb_len(server->header) - server->smb_read + 4;
- int result = -EIO;
-
- if (rlen > PAGE_SIZE)
- rlen = PAGE_SIZE;
-
- sock = server_sock(server);
- if (!sock)
- goto out;
- if (sock->sk->sk_state != TCP_ESTABLISHED)
- goto out;
-
- flags = MSG_DONTWAIT | MSG_NOSIGNAL;
- iov.iov_base = drop_buffer;
- iov.iov_len = PAGE_SIZE;
- msg.msg_flags = flags;
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_control = NULL;
-
- result = kernel_recvmsg(sock, &msg, &iov, 1, rlen, flags);
-
- VERBOSE("read: %d\n", result);
- if (result < 0) {
- VERBOSE("receive error: %d\n", result);
- goto out;
- }
- server->smb_read += result;
-
- if (server->smb_read >= server->smb_len)
- server->rstate = SMB_RECV_END;
-
-out:
- return result;
-}
-
-/*
- * smb_receive
- * Only called by the smbiod thread.
- */
-int
-smb_receive(struct smb_sb_info *server, struct smb_request *req)
-{
- struct socket *sock;
- unsigned int flags;
- struct kvec iov[4];
- struct kvec *p = req->rq_iov;
- size_t num = req->rq_iovlen;
- struct msghdr msg;
- int rlen;
- int result = -EIO;
-
- sock = server_sock(server);
- if (!sock)
- goto out;
- if (sock->sk->sk_state != TCP_ESTABLISHED)
- goto out;
-
- flags = MSG_DONTWAIT | MSG_NOSIGNAL;
- msg.msg_flags = flags;
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_control = NULL;
-
- /* Dont repeat bytes and count available bufferspace */
- rlen = min_t(int, smb_move_iov(&p, &num, iov, req->rq_bytes_recvd),
- (req->rq_rlen - req->rq_bytes_recvd));
-
- result = kernel_recvmsg(sock, &msg, p, num, rlen, flags);
-
- VERBOSE("read: %d\n", result);
- if (result < 0) {
- VERBOSE("receive error: %d\n", result);
- goto out;
- }
- req->rq_bytes_recvd += result;
- server->smb_read += result;
-
-out:
- return result;
-}
-
-/*
- * Try to send a SMB request. This may return after sending only parts of the
- * request. SMB_REQ_TRANSMITTED will be set if a request was fully sent.
- *
- * Parts of this was taken from xprt_sendmsg from net/sunrpc/xprt.c
- */
-int
-smb_send_request(struct smb_request *req)
-{
- struct smb_sb_info *server = req->rq_server;
- struct socket *sock;
- struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT};
- int slen = req->rq_slen - req->rq_bytes_sent;
- int result = -EIO;
- struct kvec iov[4];
- struct kvec *p = req->rq_iov;
- size_t num = req->rq_iovlen;
-
- sock = server_sock(server);
- if (!sock)
- goto out;
- if (sock->sk->sk_state != TCP_ESTABLISHED)
- goto out;
-
- /* Dont repeat bytes */
- if (req->rq_bytes_sent)
- smb_move_iov(&p, &num, iov, req->rq_bytes_sent);
-
- result = kernel_sendmsg(sock, &msg, p, num, slen);
-
- if (result >= 0) {
- req->rq_bytes_sent += result;
- if (req->rq_bytes_sent >= req->rq_slen)
- req->rq_flags |= SMB_REQ_TRANSMITTED;
- }
-out:
- return result;
-}
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c
deleted file mode 100644
index 00b2909..0000000
--- a/fs/smbfs/symlink.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * symlink.c
- *
- * Copyright (C) 2002 by John Newbigin
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/net.h>
-#include <linux/namei.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <linux/smbno.h>
-#include <linux/smb_fs.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname)
-{
- DEBUG1("create symlink %s -> %s/%s\n", oldname, DENTRY_PATH(dentry));
-
- return smb_proc_symlink(server_from_dentry(dentry), dentry, oldname);
-}
-
-static void *smb_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- char *link = __getname();
- DEBUG1("followlink of %s/%s\n", DENTRY_PATH(dentry));
-
- if (!link) {
- link = ERR_PTR(-ENOMEM);
- } else {
- int len = smb_proc_read_link(server_from_dentry(dentry),
- dentry, link, PATH_MAX - 1);
- if (len < 0) {
- __putname(link);
- link = ERR_PTR(len);
- } else {
- link[len] = 0;
- }
- }
- nd_set_link(nd, link);
- return NULL;
-}
-
-static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
-{
- char *s = nd_get_link(nd);
- if (!IS_ERR(s))
- __putname(s);
-}
-
-const struct inode_operations smb_link_inode_operations =
-{
- .readlink = generic_readlink,
- .follow_link = smb_follow_link,
- .put_link = smb_put_link,
-};
diff --git a/fs/splice.c b/fs/splice.c
index 8f1dfae..ce2f025 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1311,18 +1311,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
struct pipe_inode_info *opipe,
size_t len, unsigned int flags);
-/*
- * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
- * location, so checking ->i_pipe is not enough to verify that this is a
- * pipe.
- */
-static inline struct pipe_inode_info *pipe_info(struct inode *inode)
-{
- if (S_ISFIFO(inode->i_mode))
- return inode->i_pipe;
-
- return NULL;
-}
/*
* Determine where to splice to/from.
@@ -1336,8 +1324,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
loff_t offset, *off;
long ret;
- ipipe = pipe_info(in->f_path.dentry->d_inode);
- opipe = pipe_info(out->f_path.dentry->d_inode);
+ ipipe = get_pipe_info(in);
+ opipe = get_pipe_info(out);
if (ipipe && opipe) {
if (off_in || off_out)
@@ -1555,7 +1543,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
int error;
long ret;
- pipe = pipe_info(file->f_path.dentry->d_inode);
+ pipe = get_pipe_info(file);
if (!pipe)
return -EBADF;
@@ -1642,7 +1630,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
};
long ret;
- pipe = pipe_info(file->f_path.dentry->d_inode);
+ pipe = get_pipe_info(file);
if (!pipe)
return -EBADF;
@@ -2022,8 +2010,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
static long do_tee(struct file *in, struct file *out, size_t len,
unsigned int flags)
{
- struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode);
- struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode);
+ struct pipe_inode_info *ipipe = get_pipe_info(in);
+ struct pipe_inode_info *opipe = get_pipe_info(out);
int ret = -EINVAL;
/*
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 07a4f11..20700b9 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -370,12 +370,10 @@ static void squashfs_put_super(struct super_block *sb)
}
-static int squashfs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super);
}
@@ -442,16 +440,23 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb)
}
-static void squashfs_destroy_inode(struct inode *inode)
+static void squashfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode));
}
+static void squashfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, squashfs_i_callback);
+}
+
static struct file_system_type squashfs_fs_type = {
.owner = THIS_MODULE,
.name = "squashfs",
- .get_sb = squashfs_get_sb,
+ .mount = squashfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV
};
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c
index 652b854..3876c36 100644
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -158,17 +158,18 @@ static int squashfs_xattr_get(struct inode *inode, int name_index,
strncmp(target, name, name_size) == 0) {
/* found xattr */
if (type & SQUASHFS_XATTR_VALUE_OOL) {
- __le64 xattr;
+ __le64 xattr_val;
+ u64 xattr;
/* val is a reference to the real location */
err = squashfs_read_metadata(sb, &val, &start,
&offset, sizeof(val));
if (err < 0)
goto failed;
- err = squashfs_read_metadata(sb, &xattr, &start,
- &offset, sizeof(xattr));
+ err = squashfs_read_metadata(sb, &xattr_val,
+ &start, &offset, sizeof(xattr_val));
if (err < 0)
goto failed;
- xattr = le64_to_cpu(xattr);
+ xattr = le64_to_cpu(xattr_val);
start = SQUASHFS_XATTR_BLK(xattr) +
msblk->xattr_table;
offset = SQUASHFS_XATTR_OFFSET(xattr);
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index 49fe0d7..b634efc 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -25,7 +25,7 @@
extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
u64 *, int *);
extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
- int *, unsigned long long *);
+ unsigned int *, unsigned long long *);
#else
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
u64 start, u64 *xattr_table_start, int *xattr_ids)
@@ -35,7 +35,7 @@ static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
}
static inline int squashfs_xattr_lookup(struct super_block *sb,
- unsigned int index, int *count, int *size,
+ unsigned int index, int *count, unsigned int *size,
unsigned long long *xattr)
{
return 0;
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index cfb4110..d33be5d 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -34,6 +34,7 @@
#include "squashfs_fs_sb.h"
#include "squashfs_fs_i.h"
#include "squashfs.h"
+#include "xattr.h"
/*
* Map xattr id using the xattr id look up table
diff --git a/fs/super.c b/fs/super.c
index b9c9869..823e061 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -30,6 +30,7 @@
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/backing-dev.h>
+#include <linux/rculist_bl.h>
#include "internal.h"
@@ -71,7 +72,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
INIT_LIST_HEAD(&s->s_files);
#endif
INIT_LIST_HEAD(&s->s_instances);
- INIT_HLIST_HEAD(&s->s_anon);
+ INIT_HLIST_BL_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_dentry_lru);
init_rwsem(&s->s_umount);
@@ -715,15 +716,14 @@ static int ns_set_super(struct super_block *sb, void *data)
return set_anon_super(sb, NULL);
}
-int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
- int (*fill_super)(struct super_block *, void *, int),
- struct vfsmount *mnt)
+struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
+ void *data, int (*fill_super)(struct super_block *, void *, int))
{
struct super_block *sb;
sb = sget(fs_type, ns_test_super, ns_set_super, data);
if (IS_ERR(sb))
- return PTR_ERR(sb);
+ return ERR_CAST(sb);
if (!sb->s_root) {
int err;
@@ -731,17 +731,16 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
if (err) {
deactivate_locked_super(sb);
- return err;
+ return ERR_PTR(err);
}
sb->s_flags |= MS_ACTIVE;
}
- simple_set_mnt(mnt, sb);
- return 0;
+ return dget(sb->s_root);
}
-EXPORT_SYMBOL(get_sb_ns);
+EXPORT_SYMBOL(mount_ns);
#ifdef CONFIG_BLOCK
static int set_bdev_super(struct super_block *s, void *data)
@@ -762,10 +761,9 @@ static int test_bdev_super(struct super_block *s, void *data)
return (void *)s->s_bdev == data;
}
-int get_sb_bdev(struct file_system_type *fs_type,
+struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
- int (*fill_super)(struct super_block *, void *, int),
- struct vfsmount *mnt)
+ int (*fill_super)(struct super_block *, void *, int))
{
struct block_device *bdev;
struct super_block *s;
@@ -777,7 +775,7 @@ int get_sb_bdev(struct file_system_type *fs_type,
bdev = open_bdev_exclusive(dev_name, mode, fs_type);
if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ return ERR_CAST(bdev);
/*
* once the super is inserted into the list by sget, s_umount
@@ -829,15 +827,30 @@ int get_sb_bdev(struct file_system_type *fs_type,
bdev->bd_super = s;
}
- simple_set_mnt(mnt, s);
- return 0;
+ return dget(s->s_root);
error_s:
error = PTR_ERR(s);
error_bdev:
close_bdev_exclusive(bdev, mode);
error:
- return error;
+ return ERR_PTR(error);
+}
+EXPORT_SYMBOL(mount_bdev);
+
+int get_sb_bdev(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data,
+ int (*fill_super)(struct super_block *, void *, int),
+ struct vfsmount *mnt)
+{
+ struct dentry *root;
+
+ root = mount_bdev(fs_type, flags, dev_name, data, fill_super);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ mnt->mnt_root = root;
+ mnt->mnt_sb = root->d_sb;
+ return 0;
}
EXPORT_SYMBOL(get_sb_bdev);
@@ -856,29 +869,42 @@ void kill_block_super(struct super_block *sb)
EXPORT_SYMBOL(kill_block_super);
#endif
-int get_sb_nodev(struct file_system_type *fs_type,
+struct dentry *mount_nodev(struct file_system_type *fs_type,
int flags, void *data,
- int (*fill_super)(struct super_block *, void *, int),
- struct vfsmount *mnt)
+ int (*fill_super)(struct super_block *, void *, int))
{
int error;
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
if (IS_ERR(s))
- return PTR_ERR(s);
+ return ERR_CAST(s);
s->s_flags = flags;
error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
if (error) {
deactivate_locked_super(s);
- return error;
+ return ERR_PTR(error);
}
s->s_flags |= MS_ACTIVE;
- simple_set_mnt(mnt, s);
- return 0;
+ return dget(s->s_root);
}
+EXPORT_SYMBOL(mount_nodev);
+int get_sb_nodev(struct file_system_type *fs_type,
+ int flags, void *data,
+ int (*fill_super)(struct super_block *, void *, int),
+ struct vfsmount *mnt)
+{
+ struct dentry *root;
+
+ root = mount_nodev(fs_type, flags, data, fill_super);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ mnt->mnt_root = root;
+ mnt->mnt_sb = root->d_sb;
+ return 0;
+}
EXPORT_SYMBOL(get_sb_nodev);
static int compare_single(struct super_block *s, void *p)
@@ -886,29 +912,42 @@ static int compare_single(struct super_block *s, void *p)
return 1;
}
-int get_sb_single(struct file_system_type *fs_type,
+struct dentry *mount_single(struct file_system_type *fs_type,
int flags, void *data,
- int (*fill_super)(struct super_block *, void *, int),
- struct vfsmount *mnt)
+ int (*fill_super)(struct super_block *, void *, int))
{
struct super_block *s;
int error;
s = sget(fs_type, compare_single, set_anon_super, NULL);
if (IS_ERR(s))
- return PTR_ERR(s);
+ return ERR_CAST(s);
if (!s->s_root) {
s->s_flags = flags;
error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
if (error) {
deactivate_locked_super(s);
- return error;
+ return ERR_PTR(error);
}
s->s_flags |= MS_ACTIVE;
} else {
do_remount_sb(s, flags, data, 0);
}
- simple_set_mnt(mnt, s);
+ return dget(s->s_root);
+}
+EXPORT_SYMBOL(mount_single);
+
+int get_sb_single(struct file_system_type *fs_type,
+ int flags, void *data,
+ int (*fill_super)(struct super_block *, void *, int),
+ struct vfsmount *mnt)
+{
+ struct dentry *root;
+ root = mount_single(fs_type, flags, data, fill_super);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ mnt->mnt_root = root;
+ mnt->mnt_sb = root->d_sb;
return 0;
}
@@ -918,6 +957,7 @@ struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
struct vfsmount *mnt;
+ struct dentry *root;
char *secdata = NULL;
int error;
@@ -942,9 +982,19 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
goto out_free_secdata;
}
- error = type->get_sb(type, flags, name, data, mnt);
- if (error < 0)
- goto out_free_secdata;
+ if (type->mount) {
+ root = type->mount(type, flags, name, data);
+ if (IS_ERR(root)) {
+ error = PTR_ERR(root);
+ goto out_free_secdata;
+ }
+ mnt->mnt_root = root;
+ mnt->mnt_sb = root->d_sb;
+ } else {
+ error = type->get_sb(type, flags, name, data, mnt);
+ if (error < 0)
+ goto out_free_secdata;
+ }
BUG_ON(!mnt->mnt_sb);
WARN_ON(!mnt->mnt_sb->s_bdi);
mnt->mnt_sb->s_flags |= MS_BORN;
@@ -1090,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
return mnt;
err:
- mntput(mnt);
+ mntput_long(mnt);
return ERR_PTR(err);
}
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 7e54bac..ea9120a 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -231,7 +231,7 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
goto repeat;
}
-static int sysfs_dentry_delete(struct dentry *dentry)
+static int sysfs_dentry_delete(const struct dentry *dentry)
{
struct sysfs_dirent *sd = dentry->d_fsdata;
return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
@@ -239,9 +239,13 @@ static int sysfs_dentry_delete(struct dentry *dentry)
static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct sysfs_dirent *sd = dentry->d_fsdata;
+ struct sysfs_dirent *sd;
int is_dir;
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ sd = dentry->d_fsdata;
mutex_lock(&sysfs_mutex);
/* The sysfs dirent has been deleted */
@@ -701,7 +705,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
/* instantiate and hash dentry */
ret = d_find_alias(inode);
if (!ret) {
- dentry->d_op = &sysfs_dentry_ops;
+ d_set_d_op(dentry, &sysfs_dentry_ops);
dentry->d_fsdata = sysfs_get(sd);
d_add(dentry, inode);
} else {
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 442f34f..c8769dc2 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -165,10 +165,7 @@ int sysfs_merge_group(struct kobject *kobj,
struct attribute *const *attr;
int i;
- if (grp)
- dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
- else
- dir_sd = sysfs_get(kobj->sd);
+ dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
if (!dir_sd)
return -ENOENT;
@@ -195,10 +192,7 @@ void sysfs_unmerge_group(struct kobject *kobj,
struct sysfs_dirent *dir_sd;
struct attribute *const *attr;
- if (grp)
- dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
- else
- dir_sd = sysfs_get(kobj->sd);
+ dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
if (dir_sd) {
for (attr = grp->attrs; *attr; ++attr)
sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index cffb1fd..0a12eb8 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -19,6 +19,7 @@
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/sysfs.h>
#include <linux/xattr.h>
#include <linux/security.h>
#include "sysfs.h"
@@ -348,13 +349,18 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha
return -ENOENT;
}
-int sysfs_permission(struct inode *inode, int mask)
+int sysfs_permission(struct inode *inode, int mask, unsigned int flags)
{
- struct sysfs_dirent *sd = inode->i_private;
+ struct sysfs_dirent *sd;
+
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
+ sd = inode->i_private;
mutex_lock(&sysfs_mutex);
sysfs_refresh_inode(sd, inode);
mutex_unlock(&sysfs_mutex);
- return generic_permission(inode, mask, NULL);
+ return generic_permission(inode, mask, flags, NULL);
}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f2af225..2668957 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -23,7 +23,7 @@
#include "sysfs.h"
-static struct vfsmount *sysfs_mount;
+static struct vfsmount *sysfs_mnt;
struct kmem_cache *sysfs_dir_cachep;
static const struct super_operations sysfs_ops = {
@@ -95,18 +95,17 @@ static int sysfs_set_super(struct super_block *sb, void *data)
return error;
}
-static int sysfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *sysfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
struct sysfs_super_info *info;
enum kobj_ns_type type;
struct super_block *sb;
int error;
- error = -ENOMEM;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
- goto out;
+ return ERR_PTR(-ENOMEM);
for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
info->ns[type] = kobj_ns_current(type);
@@ -114,24 +113,19 @@ static int sysfs_get_sb(struct file_system_type *fs_type,
sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
if (IS_ERR(sb) || sb->s_fs_info != info)
kfree(info);
- if (IS_ERR(sb)) {
- error = PTR_ERR(sb);
- goto out;
- }
+ if (IS_ERR(sb))
+ return ERR_CAST(sb);
if (!sb->s_root) {
sb->s_flags = flags;
error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
if (error) {
deactivate_locked_super(sb);
- goto out;
+ return ERR_PTR(error);
}
sb->s_flags |= MS_ACTIVE;
}
- simple_set_mnt(mnt, sb);
- error = 0;
-out:
- return error;
+ return dget(sb->s_root);
}
static void sysfs_kill_sb(struct super_block *sb)
@@ -147,7 +141,7 @@ static void sysfs_kill_sb(struct super_block *sb)
static struct file_system_type sysfs_fs_type = {
.name = "sysfs",
- .get_sb = sysfs_get_sb,
+ .mount = sysfs_mount,
.kill_sb = sysfs_kill_sb,
};
@@ -189,11 +183,11 @@ int __init sysfs_init(void)
err = register_filesystem(&sysfs_fs_type);
if (!err) {
- sysfs_mount = kern_mount(&sysfs_fs_type);
- if (IS_ERR(sysfs_mount)) {
+ sysfs_mnt = kern_mount(&sysfs_fs_type);
+ if (IS_ERR(sysfs_mnt)) {
printk(KERN_ERR "sysfs: could not mount!\n");
- err = PTR_ERR(sysfs_mount);
- sysfs_mount = NULL;
+ err = PTR_ERR(sysfs_mnt);
+ sysfs_mnt = NULL;
unregister_filesystem(&sysfs_fs_type);
goto out_err;
}
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d9be60a..3d28af31 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -9,6 +9,7 @@
*/
#include <linux/lockdep.h>
+#include <linux/kobject_ns.h>
#include <linux/fs.h>
struct sysfs_open_dirent;
@@ -200,7 +201,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
void sysfs_evict_inode(struct inode *inode);
int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
-int sysfs_permission(struct inode *inode, int mask);
+int sysfs_permission(struct inode *inode, int mask, unsigned int flags);
int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index de44d06..0630eb96 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -333,11 +333,18 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
return &si->vfs_inode;
}
-static void sysv_destroy_inode(struct inode *inode)
+static void sysv_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
}
+static void sysv_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, sysv_i_callback);
+}
+
static void init_once(void *p)
{
struct sysv_inode_info *si = (struct sysv_inode_info *)p;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 11e7f7d..b5e68da 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -27,7 +27,8 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
return err;
}
-static int sysv_hash(struct dentry *dentry, struct qstr *qstr)
+static int sysv_hash(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
/* Truncate the name in place, avoids having to define a compare
function. */
@@ -47,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st
struct inode * inode = NULL;
ino_t ino;
- dentry->d_op = dir->i_sb->s_root->d_op;
+ d_set_d_op(dentry, dir->i_sb->s_root->d_op);
if (dentry->d_name.len > SYSV_NAMELEN)
return ERR_PTR(-ENAMETOOLONG);
ino = sysv_inode_by_name(dentry);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index a0b0cda..76712ae 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
if (sbi->s_forced_ro)
sb->s_flags |= MS_RDONLY;
if (sbi->s_truncate)
- sb->s_root->d_op = &sysv_dentry_operations;
+ d_set_d_op(sb->s_root, &sysv_dentry_operations);
return 1;
}
@@ -526,23 +526,22 @@ failed:
/* Every kernel module contains stuff like this. */
-static int sysv_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *sysv_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, sysv_fill_super);
}
-static int v7_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *v7_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, v7_fill_super);
}
static struct file_system_type sysv_fs_type = {
.owner = THIS_MODULE,
.name = "sysv",
- .get_sb = sysv_get_sb,
+ .mount = sysv_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -550,7 +549,7 @@ static struct file_system_type sysv_fs_type = {
static struct file_system_type v7_fs_type = {
.owner = THIS_MODULE,
.name = "v7",
- .get_sb = v7_get_sb,
+ .mount = v7_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 9a47c9f..6e11c29 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -272,12 +272,20 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb)
return &ui->vfs_inode;
};
+static void ubifs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ubifs_inode *ui = ubifs_inode(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ubifs_inode_slab, ui);
+}
+
static void ubifs_destroy_inode(struct inode *inode)
{
struct ubifs_inode *ui = ubifs_inode(inode);
kfree(ui->data);
- kmem_cache_free(ubifs_inode_slab, inode);
+ call_rcu(&inode->i_rcu, ubifs_i_callback);
}
/*
@@ -2038,8 +2046,8 @@ static int sb_test(struct super_block *sb, void *data)
return c->vi.cdev == *dev;
}
-static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
- const char *name, void *data, struct vfsmount *mnt)
+static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
+ const char *name, void *data)
{
struct ubi_volume_desc *ubi;
struct ubi_volume_info vi;
@@ -2057,7 +2065,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
if (IS_ERR(ubi)) {
dbg_err("cannot open \"%s\", error %d",
name, (int)PTR_ERR(ubi));
- return PTR_ERR(ubi);
+ return ERR_CAST(ubi);
}
ubi_get_volume_info(ubi, &vi);
@@ -2095,20 +2103,19 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
/* 'fill_super()' opens ubi again so we must close it here */
ubi_close_volume(ubi);
- simple_set_mnt(mnt, sb);
- return 0;
+ return dget(sb->s_root);
out_deact:
deactivate_locked_super(sb);
out_close:
ubi_close_volume(ubi);
- return err;
+ return ERR_PTR(err);
}
static struct file_system_type ubifs_fs_type = {
.name = "ubifs",
.owner = THIS_MODULE,
- .get_sb = ubifs_get_sb,
+ .mount = ubifs_mount,
.kill_sb = kill_anon_super,
};
diff --git a/fs/udf/Kconfig b/fs/udf/Kconfig
index f8def3c..0e0e99b 100644
--- a/fs/udf/Kconfig
+++ b/fs/udf/Kconfig
@@ -1,6 +1,5 @@
config UDF_FS
tristate "UDF file system support"
- depends on BKL # needs serious work to remove
select CRC_ITU_T
help
This is the new file system used on some CD-ROMs and DVDs. Say Y if
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index b608efa..306ee39 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -157,10 +157,9 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
udf_debug("bit %ld already set\n", bit + i);
udf_debug("byte=%2x\n",
((char *)bh->b_data)[(bit + i) >> 3]);
- } else {
- udf_add_free_space(sb, sbi->s_partition, 1);
}
}
+ udf_add_free_space(sb, sbi->s_partition, count);
mark_buffer_dirty(bh);
if (overflow) {
block += count;
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 51552bf..eb8bfe2 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -30,7 +30,6 @@
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include "udf_i.h"
@@ -190,18 +189,14 @@ static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir)
struct inode *dir = filp->f_path.dentry->d_inode;
int result;
- lock_kernel();
-
if (filp->f_pos == 0) {
if (filldir(dirent, ".", 1, filp->f_pos, dir->i_ino, DT_DIR) < 0) {
- unlock_kernel();
return 0;
}
filp->f_pos++;
}
result = do_udf_readdir(dir, filp, filldir, dirent);
- unlock_kernel();
return result;
}
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 66b9e7e..89c7848 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -32,7 +32,6 @@
#include <linux/string.h> /* memset */
#include <linux/capability.h>
#include <linux/errno.h>
-#include <linux/smp_lock.h>
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/aio.h>
@@ -114,6 +113,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
size_t count = iocb->ki_left;
struct udf_inode_info *iinfo = UDF_I(inode);
+ down_write(&iinfo->i_data_sem);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
if (file->f_flags & O_APPEND)
pos = inode->i_size;
@@ -126,6 +126,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
udf_expand_file_adinicb(inode, pos + count, &err);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
udf_debug("udf_expand_adinicb: err=%d\n", err);
+ up_write(&iinfo->i_data_sem);
return err;
}
} else {
@@ -135,6 +136,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
iinfo->i_lenAlloc = inode->i_size;
}
}
+ up_write(&iinfo->i_data_sem);
retval = generic_file_aio_write(iocb, iov, nr_segs, ppos);
if (retval > 0)
@@ -149,8 +151,6 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
long old_block, new_block;
int result = -EINVAL;
- lock_kernel();
-
if (file_permission(filp, MAY_READ) != 0) {
udf_debug("no permission to access inode %lu\n", inode->i_ino);
result = -EPERM;
@@ -196,7 +196,6 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
out:
- unlock_kernel();
return result;
}
@@ -204,10 +203,10 @@ static int udf_release_file(struct inode *inode, struct file *filp)
{
if (filp->f_mode & FMODE_WRITE) {
mutex_lock(&inode->i_mutex);
- lock_kernel();
+ down_write(&UDF_I(inode)->i_data_sem);
udf_discard_prealloc(inode);
udf_truncate_tail_extent(inode);
- unlock_kernel();
+ up_write(&UDF_I(inode)->i_data_sem);
mutex_unlock(&inode->i_mutex);
}
return 0;
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 75d9304..6fb7e0a 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -92,28 +92,19 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
return NULL;
}
- mutex_lock(&sbi->s_alloc_mutex);
if (sbi->s_lvid_bh) {
- struct logicalVolIntegrityDesc *lvid =
- (struct logicalVolIntegrityDesc *)
- sbi->s_lvid_bh->b_data;
- struct logicalVolIntegrityDescImpUse *lvidiu =
- udf_sb_lvidiu(sbi);
- struct logicalVolHeaderDesc *lvhd;
- uint64_t uniqueID;
- lvhd = (struct logicalVolHeaderDesc *)
- (lvid->logicalVolContentsUse);
+ struct logicalVolIntegrityDescImpUse *lvidiu;
+
+ iinfo->i_unique = lvid_get_unique_id(sb);
+ mutex_lock(&sbi->s_alloc_mutex);
+ lvidiu = udf_sb_lvidiu(sbi);
if (S_ISDIR(mode))
le32_add_cpu(&lvidiu->numDirs, 1);
else
le32_add_cpu(&lvidiu->numFiles, 1);
- iinfo->i_unique = uniqueID = le64_to_cpu(lvhd->uniqueID);
- if (!(++uniqueID & 0x00000000FFFFFFFFUL))
- uniqueID += 16;
- lvhd->uniqueID = cpu_to_le64(uniqueID);
udf_updated_lvid(sb);
+ mutex_unlock(&sbi->s_alloc_mutex);
}
- mutex_unlock(&sbi->s_alloc_mutex);
inode_init_owner(inode, dir, mode);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index fc48f37..c6a2e78 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -31,7 +31,6 @@
#include "udfdecl.h"
#include <linux/mm.h>
-#include <linux/smp_lock.h>
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
@@ -51,6 +50,7 @@ MODULE_LICENSE("GPL");
static mode_t udf_convert_permissions(struct fileEntry *);
static int udf_update_inode(struct inode *, int);
static void udf_fill_inode(struct inode *, struct buffer_head *);
+static int udf_sync_inode(struct inode *inode);
static int udf_alloc_i_data(struct inode *inode, size_t size);
static struct buffer_head *inode_getblk(struct inode *, sector_t, int *,
sector_t *, int *);
@@ -79,9 +79,7 @@ void udf_evict_inode(struct inode *inode)
want_delete = 1;
inode->i_size = 0;
udf_truncate(inode);
- lock_kernel();
udf_update_inode(inode, IS_SYNC(inode));
- unlock_kernel();
}
invalidate_inode_buffers(inode);
end_writeback(inode);
@@ -97,9 +95,7 @@ void udf_evict_inode(struct inode *inode)
kfree(iinfo->i_ext.i_data);
iinfo->i_ext.i_data = NULL;
if (want_delete) {
- lock_kernel();
udf_free_inode(inode);
- unlock_kernel();
}
}
@@ -302,10 +298,9 @@ static int udf_get_block(struct inode *inode, sector_t block,
err = -EIO;
new = 0;
bh = NULL;
-
- lock_kernel();
-
iinfo = UDF_I(inode);
+
+ down_write(&iinfo->i_data_sem);
if (block == iinfo->i_next_alloc_block + 1) {
iinfo->i_next_alloc_block++;
iinfo->i_next_alloc_goal++;
@@ -324,7 +319,7 @@ static int udf_get_block(struct inode *inode, sector_t block,
map_bh(bh_result, inode->i_sb, phys);
abort:
- unlock_kernel();
+ up_write(&iinfo->i_data_sem);
return err;
}
@@ -1022,16 +1017,16 @@ void udf_truncate(struct inode *inode)
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return;
- lock_kernel();
iinfo = UDF_I(inode);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
+ down_write(&iinfo->i_data_sem);
if (inode->i_sb->s_blocksize <
(udf_file_entry_alloc_offset(inode) +
inode->i_size)) {
udf_expand_file_adinicb(inode, inode->i_size, &err);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
inode->i_size = iinfo->i_lenAlloc;
- unlock_kernel();
+ up_write(&iinfo->i_data_sem);
return;
} else
udf_truncate_extents(inode);
@@ -1042,10 +1037,13 @@ void udf_truncate(struct inode *inode)
offset - udf_file_entry_alloc_offset(inode));
iinfo->i_lenAlloc = inode->i_size;
}
+ up_write(&iinfo->i_data_sem);
} else {
block_truncate_page(inode->i_mapping, inode->i_size,
udf_get_block);
+ down_write(&iinfo->i_data_sem);
udf_truncate_extents(inode);
+ up_write(&iinfo->i_data_sem);
}
inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
@@ -1053,7 +1051,6 @@ void udf_truncate(struct inode *inode)
udf_sync_inode(inode);
else
mark_inode_dirty(inode);
- unlock_kernel();
}
static void __udf_read_inode(struct inode *inode)
@@ -1202,6 +1199,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
return;
}
+ read_lock(&sbi->s_cred_lock);
inode->i_uid = le32_to_cpu(fe->uid);
if (inode->i_uid == -1 ||
UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) ||
@@ -1214,13 +1212,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET))
inode->i_gid = UDF_SB(inode->i_sb)->s_gid;
- inode->i_nlink = le16_to_cpu(fe->fileLinkCount);
- if (!inode->i_nlink)
- inode->i_nlink = 1;
-
- inode->i_size = le64_to_cpu(fe->informationLength);
- iinfo->i_lenExtents = inode->i_size;
-
if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY &&
sbi->s_fmode != UDF_INVALID_MODE)
inode->i_mode = sbi->s_fmode;
@@ -1230,6 +1221,14 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
else
inode->i_mode = udf_convert_permissions(fe);
inode->i_mode &= ~sbi->s_umask;
+ read_unlock(&sbi->s_cred_lock);
+
+ inode->i_nlink = le16_to_cpu(fe->fileLinkCount);
+ if (!inode->i_nlink)
+ inode->i_nlink = 1;
+
+ inode->i_size = le64_to_cpu(fe->informationLength);
+ iinfo->i_lenExtents = inode->i_size;
if (iinfo->i_efe == 0) {
inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) <<
@@ -1373,16 +1372,10 @@ static mode_t udf_convert_permissions(struct fileEntry *fe)
int udf_write_inode(struct inode *inode, struct writeback_control *wbc)
{
- int ret;
-
- lock_kernel();
- ret = udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
- unlock_kernel();
-
- return ret;
+ return udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
}
-int udf_sync_inode(struct inode *inode)
+static int udf_sync_inode(struct inode *inode)
{
return udf_update_inode(inode, 1);
}
@@ -2048,7 +2041,7 @@ long udf_block_map(struct inode *inode, sector_t block)
struct extent_position epos = {};
int ret;
- lock_kernel();
+ down_read(&UDF_I(inode)->i_data_sem);
if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) ==
(EXT_RECORDED_ALLOCATED >> 30))
@@ -2056,7 +2049,7 @@ long udf_block_map(struct inode *inode, sector_t block)
else
ret = 0;
- unlock_kernel();
+ up_read(&UDF_I(inode)->i_data_sem);
brelse(epos.bh);
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_VARCONV))
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 6d8dc02..2be0f9e 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -27,7 +27,6 @@
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/sched.h>
#include <linux/crc-itu-t.h>
@@ -228,10 +227,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
}
if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) &&
- isdotdot) {
- brelse(epos.bh);
- return fi;
- }
+ isdotdot)
+ goto out_ok;
if (!lfi)
continue;
@@ -263,7 +260,6 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
if (dentry->d_name.len > UDF_NAME_LEN - 2)
return ERR_PTR(-ENAMETOOLONG);
- lock_kernel();
#ifdef UDF_RECOVERY
/* temporary shorthand for specifying files by inode number */
if (!strncmp(dentry->d_name.name, ".B=", 3)) {
@@ -275,7 +271,6 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
};
inode = udf_iget(dir->i_sb, lb);
if (!inode) {
- unlock_kernel();
return ERR_PTR(-EACCES);
}
} else
@@ -291,11 +286,9 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
loc = lelb_to_cpu(cfi.icb.extLocation);
inode = udf_iget(dir->i_sb, &loc);
if (!inode) {
- unlock_kernel();
return ERR_PTR(-EACCES);
}
}
- unlock_kernel();
return d_splice_alias(inode, dentry);
}
@@ -476,15 +469,19 @@ add:
f_pos >> dir->i_sb->s_blocksize_bits, 1, err);
if (!fibh->ebh)
goto out_err;
+ /* Extents could have been merged, invalidate our position */
+ brelse(epos.bh);
+ epos.bh = NULL;
+ epos.block = dinfo->i_location;
+ epos.offset = udf_file_entry_alloc_offset(dir);
if (!fibh->soffset) {
- if (udf_next_aext(dir, &epos, &eloc, &elen, 1) ==
- (EXT_RECORDED_ALLOCATED >> 30)) {
- block = eloc.logicalBlockNum + ((elen - 1) >>
+ /* Find the freshly allocated block */
+ while (udf_next_aext(dir, &epos, &eloc, &elen, 1) ==
+ (EXT_RECORDED_ALLOCATED >> 30))
+ ;
+ block = eloc.logicalBlockNum + ((elen - 1) >>
dir->i_sb->s_blocksize_bits);
- } else
- block++;
-
brelse(fibh->sbh);
fibh->sbh = fibh->ebh;
fi = (struct fileIdentDesc *)(fibh->sbh->b_data);
@@ -562,10 +559,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
int err;
struct udf_inode_info *iinfo;
- lock_kernel();
inode = udf_new_inode(dir, mode, &err);
if (!inode) {
- unlock_kernel();
return err;
}
@@ -583,7 +578,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
inode->i_nlink--;
mark_inode_dirty(inode);
iput(inode);
- unlock_kernel();
return err;
}
cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -596,7 +590,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
- unlock_kernel();
d_instantiate(dentry, inode);
return 0;
@@ -614,7 +607,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
if (!old_valid_dev(rdev))
return -EINVAL;
- lock_kernel();
err = -EIO;
inode = udf_new_inode(dir, mode, &err);
if (!inode)
@@ -627,7 +619,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
inode->i_nlink--;
mark_inode_dirty(inode);
iput(inode);
- unlock_kernel();
return err;
}
cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -646,7 +637,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
err = 0;
out:
- unlock_kernel();
return err;
}
@@ -659,7 +649,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct udf_inode_info *dinfo = UDF_I(dir);
struct udf_inode_info *iinfo;
- lock_kernel();
err = -EMLINK;
if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
goto out;
@@ -712,7 +701,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
err = 0;
out:
- unlock_kernel();
return err;
}
@@ -794,7 +782,6 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
struct kernel_lb_addr tloc;
retval = -ENOENT;
- lock_kernel();
fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
if (!fi)
goto out;
@@ -826,7 +813,6 @@ end_rmdir:
brelse(fibh.sbh);
out:
- unlock_kernel();
return retval;
}
@@ -840,7 +826,6 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
struct kernel_lb_addr tloc;
retval = -ENOENT;
- lock_kernel();
fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
if (!fi)
goto out;
@@ -870,7 +855,6 @@ end_unlink:
brelse(fibh.sbh);
out:
- unlock_kernel();
return retval;
}
@@ -890,21 +874,21 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
int block;
unsigned char *name = NULL;
int namelen;
- struct buffer_head *bh;
struct udf_inode_info *iinfo;
+ struct super_block *sb = dir->i_sb;
- lock_kernel();
inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err);
if (!inode)
goto out;
+ iinfo = UDF_I(inode);
+ down_write(&iinfo->i_data_sem);
name = kmalloc(UDF_NAME_LEN, GFP_NOFS);
if (!name) {
err = -ENOMEM;
goto out_no_entry;
}
- iinfo = UDF_I(inode);
inode->i_data.a_ops = &udf_symlink_aops;
inode->i_op = &udf_symlink_inode_operations;
@@ -912,7 +896,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
struct kernel_lb_addr eloc;
uint32_t bsize;
- block = udf_new_block(inode->i_sb, inode,
+ block = udf_new_block(sb, inode,
iinfo->i_location.partitionReferenceNum,
iinfo->i_location.logicalBlockNum, &err);
if (!block)
@@ -923,17 +907,17 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
eloc.logicalBlockNum = block;
eloc.partitionReferenceNum =
iinfo->i_location.partitionReferenceNum;
- bsize = inode->i_sb->s_blocksize;
+ bsize = sb->s_blocksize;
iinfo->i_lenExtents = bsize;
udf_add_aext(inode, &epos, &eloc, bsize, 0);
brelse(epos.bh);
- block = udf_get_pblock(inode->i_sb, block,
+ block = udf_get_pblock(sb, block,
iinfo->i_location.partitionReferenceNum,
0);
- epos.bh = udf_tgetblk(inode->i_sb, block);
+ epos.bh = udf_tgetblk(sb, block);
lock_buffer(epos.bh);
- memset(epos.bh->b_data, 0x00, inode->i_sb->s_blocksize);
+ memset(epos.bh->b_data, 0x00, bsize);
set_buffer_uptodate(epos.bh);
unlock_buffer(epos.bh);
mark_buffer_dirty_inode(epos.bh, inode);
@@ -941,7 +925,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
} else
ea = iinfo->i_ext.i_data + iinfo->i_lenEAttr;
- eoffset = inode->i_sb->s_blocksize - udf_ext0_offset(inode);
+ eoffset = sb->s_blocksize - udf_ext0_offset(inode);
pc = (struct pathComponent *)ea;
if (*symname == '/') {
@@ -981,7 +965,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
}
if (pc->componentType == 5) {
- namelen = udf_put_filename(inode->i_sb, compstart, name,
+ namelen = udf_put_filename(sb, compstart, name,
symname - compstart);
if (!namelen)
goto out_no_entry;
@@ -1015,27 +999,16 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
if (!fi)
goto out_no_entry;
- cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
+ cfi.icb.extLength = cpu_to_le32(sb->s_blocksize);
cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
- bh = UDF_SB(inode->i_sb)->s_lvid_bh;
- if (bh) {
- struct logicalVolIntegrityDesc *lvid =
- (struct logicalVolIntegrityDesc *)bh->b_data;
- struct logicalVolHeaderDesc *lvhd;
- uint64_t uniqueID;
- lvhd = (struct logicalVolHeaderDesc *)
- lvid->logicalVolContentsUse;
- uniqueID = le64_to_cpu(lvhd->uniqueID);
+ if (UDF_SB(inode->i_sb)->s_lvid_bh) {
*(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
- cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL);
- if (!(++uniqueID & 0x00000000FFFFFFFFUL))
- uniqueID += 16;
- lvhd->uniqueID = cpu_to_le64(uniqueID);
- mark_buffer_dirty(bh);
+ cpu_to_le32(lvid_get_unique_id(sb));
}
udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
mark_inode_dirty(dir);
+ up_write(&iinfo->i_data_sem);
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
@@ -1044,10 +1017,10 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
out:
kfree(name);
- unlock_kernel();
return err;
out_no_entry:
+ up_write(&iinfo->i_data_sem);
inode_dec_link_count(inode);
iput(inode);
goto out;
@@ -1060,36 +1033,20 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
struct udf_fileident_bh fibh;
struct fileIdentDesc cfi, *fi;
int err;
- struct buffer_head *bh;
- lock_kernel();
if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
- unlock_kernel();
return -EMLINK;
}
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
if (!fi) {
- unlock_kernel();
return err;
}
cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
cfi.icb.extLocation = cpu_to_lelb(UDF_I(inode)->i_location);
- bh = UDF_SB(inode->i_sb)->s_lvid_bh;
- if (bh) {
- struct logicalVolIntegrityDesc *lvid =
- (struct logicalVolIntegrityDesc *)bh->b_data;
- struct logicalVolHeaderDesc *lvhd;
- uint64_t uniqueID;
- lvhd = (struct logicalVolHeaderDesc *)
- (lvid->logicalVolContentsUse);
- uniqueID = le64_to_cpu(lvhd->uniqueID);
+ if (UDF_SB(inode->i_sb)->s_lvid_bh) {
*(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
- cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL);
- if (!(++uniqueID & 0x00000000FFFFFFFFUL))
- uniqueID += 16;
- lvhd->uniqueID = cpu_to_le64(uniqueID);
- mark_buffer_dirty(bh);
+ cpu_to_le32(lvid_get_unique_id(inode->i_sb));
}
udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
@@ -1103,7 +1060,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
mark_inode_dirty(inode);
ihold(inode);
d_instantiate(dentry, inode);
- unlock_kernel();
return 0;
}
@@ -1124,7 +1080,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
struct kernel_lb_addr tloc;
struct udf_inode_info *old_iinfo = UDF_I(old_inode);
- lock_kernel();
ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
if (ofi) {
if (ofibh.sbh != ofibh.ebh)
@@ -1248,7 +1203,6 @@ end_rename:
brelse(nfibh.ebh);
brelse(nfibh.sbh);
}
- unlock_kernel();
return retval;
}
@@ -1261,7 +1215,6 @@ static struct dentry *udf_get_parent(struct dentry *child)
struct fileIdentDesc cfi;
struct udf_fileident_bh fibh;
- lock_kernel();
if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
goto out_unlock;
@@ -1273,11 +1226,9 @@ static struct dentry *udf_get_parent(struct dentry *child)
inode = udf_iget(child->d_inode->i_sb, &tloc);
if (!inode)
goto out_unlock;
- unlock_kernel();
return d_obtain_alias(inode);
out_unlock:
- unlock_kernel();
return ERR_PTR(-EACCES);
}
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 745eb20..a71090e 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -25,6 +25,7 @@
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
+#include <linux/mutex.h>
uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
uint16_t partition, uint32_t offset)
@@ -159,7 +160,9 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
struct udf_sb_info *sbi = UDF_SB(sb);
u16 reallocationTableLen;
struct buffer_head *bh;
+ int ret = 0;
+ mutex_lock(&sbi->s_alloc_mutex);
for (i = 0; i < sbi->s_partitions; i++) {
struct udf_part_map *map = &sbi->s_partmaps[i];
if (old_block > map->s_partition_root &&
@@ -175,8 +178,10 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
break;
}
- if (!st)
- return 1;
+ if (!st) {
+ ret = 1;
+ goto out;
+ }
reallocationTableLen =
le16_to_cpu(st->reallocationTableLen);
@@ -207,14 +212,16 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
((old_block -
map->s_partition_root) &
(sdata->s_packet_len - 1));
- return 0;
+ ret = 0;
+ goto out;
} else if (origLoc == packet) {
*new_block = le32_to_cpu(
entry->mappedLocation) +
((old_block -
map->s_partition_root) &
(sdata->s_packet_len - 1));
- return 0;
+ ret = 0;
+ goto out;
} else if (origLoc > packet)
break;
}
@@ -251,20 +258,24 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
st->mapEntry[k].mappedLocation) +
((old_block - map->s_partition_root) &
(sdata->s_packet_len - 1));
- return 0;
+ ret = 0;
+ goto out;
}
- return 1;
+ ret = 1;
+ goto out;
} /* if old_block */
}
if (i == sbi->s_partitions) {
/* outside of partitions */
/* for now, fail =) */
- return 1;
+ ret = 1;
}
- return 0;
+out:
+ mutex_unlock(&sbi->s_alloc_mutex);
+ return ret;
}
static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 76f3d6d..7b27b06 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -48,7 +48,6 @@
#include <linux/stat.h>
#include <linux/cdrom.h>
#include <linux/nls.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/vfs.h>
#include <linux/vmalloc.h>
@@ -107,17 +106,16 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
}
/* UDF filesystem type */
-static int udf_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *udf_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, udf_fill_super);
}
static struct file_system_type udf_fstype = {
.owner = THIS_MODULE,
.name = "udf",
- .get_sb = udf_get_sb,
+ .mount = udf_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -136,15 +134,23 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
ei->i_next_alloc_block = 0;
ei->i_next_alloc_goal = 0;
ei->i_strat4096 = 0;
+ init_rwsem(&ei->i_data_sem);
return &ei->vfs_inode;
}
-static void udf_destroy_inode(struct inode *inode)
+static void udf_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(udf_inode_cachep, UDF_I(inode));
}
+static void udf_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, udf_i_callback);
+}
+
static void init_once(void *foo)
{
struct udf_inode_info *ei = (struct udf_inode_info *)foo;
@@ -568,13 +574,14 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
if (!udf_parse_options(options, &uopt, true))
return -EINVAL;
- lock_kernel();
+ write_lock(&sbi->s_cred_lock);
sbi->s_flags = uopt.flags;
sbi->s_uid = uopt.uid;
sbi->s_gid = uopt.gid;
sbi->s_umask = uopt.umask;
sbi->s_fmode = uopt.fmode;
sbi->s_dmode = uopt.dmode;
+ write_unlock(&sbi->s_cred_lock);
if (sbi->s_lvid_bh) {
int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
@@ -591,7 +598,6 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
udf_open_lvid(sb);
out_unlock:
- unlock_kernel();
return error;
}
@@ -960,9 +966,9 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
(sizeof(struct buffer_head *) * nr_groups);
if (size <= PAGE_SIZE)
- bitmap = kmalloc(size, GFP_KERNEL);
+ bitmap = kzalloc(size, GFP_KERNEL);
else
- bitmap = vmalloc(size); /* TODO: get rid of vmalloc */
+ bitmap = vzalloc(size); /* TODO: get rid of vzalloc */
if (bitmap == NULL) {
udf_error(sb, __func__,
@@ -971,7 +977,6 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
return NULL;
}
- memset(bitmap, 0x00, size);
bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1);
bitmap->s_nr_groups = nr_groups;
return bitmap;
@@ -1775,6 +1780,8 @@ static void udf_open_lvid(struct super_block *sb)
if (!bh)
return;
+
+ mutex_lock(&sbi->s_alloc_mutex);
lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
lvidiu = udf_sb_lvidiu(sbi);
@@ -1791,6 +1798,7 @@ static void udf_open_lvid(struct super_block *sb)
lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
mark_buffer_dirty(bh);
sbi->s_lvid_dirty = 0;
+ mutex_unlock(&sbi->s_alloc_mutex);
}
static void udf_close_lvid(struct super_block *sb)
@@ -1803,6 +1811,7 @@ static void udf_close_lvid(struct super_block *sb)
if (!bh)
return;
+ mutex_lock(&sbi->s_alloc_mutex);
lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
lvidiu = udf_sb_lvidiu(sbi);
lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
@@ -1823,6 +1832,34 @@ static void udf_close_lvid(struct super_block *sb)
lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
mark_buffer_dirty(bh);
sbi->s_lvid_dirty = 0;
+ mutex_unlock(&sbi->s_alloc_mutex);
+}
+
+u64 lvid_get_unique_id(struct super_block *sb)
+{
+ struct buffer_head *bh;
+ struct udf_sb_info *sbi = UDF_SB(sb);
+ struct logicalVolIntegrityDesc *lvid;
+ struct logicalVolHeaderDesc *lvhd;
+ u64 uniqueID;
+ u64 ret;
+
+ bh = sbi->s_lvid_bh;
+ if (!bh)
+ return 0;
+
+ lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
+ lvhd = (struct logicalVolHeaderDesc *)lvid->logicalVolContentsUse;
+
+ mutex_lock(&sbi->s_alloc_mutex);
+ ret = uniqueID = le64_to_cpu(lvhd->uniqueID);
+ if (!(++uniqueID & 0xFFFFFFFF))
+ uniqueID += 16;
+ lvhd->uniqueID = cpu_to_le64(uniqueID);
+ mutex_unlock(&sbi->s_alloc_mutex);
+ mark_buffer_dirty(bh);
+
+ return ret;
}
static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
@@ -1880,8 +1917,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
struct kernel_lb_addr rootdir, fileset;
struct udf_sb_info *sbi;
- lock_kernel();
-
uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT);
uopt.uid = -1;
uopt.gid = -1;
@@ -1890,10 +1925,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
uopt.dmode = UDF_INVALID_MODE;
sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL);
- if (!sbi) {
- unlock_kernel();
+ if (!sbi)
return -ENOMEM;
- }
sb->s_fs_info = sbi;
@@ -1930,6 +1963,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
sbi->s_fmode = uopt.fmode;
sbi->s_dmode = uopt.dmode;
sbi->s_nls_map = uopt.nls_map;
+ rwlock_init(&sbi->s_cred_lock);
if (uopt.session == 0xFFFFFFFF)
sbi->s_session = udf_get_last_session(sb);
@@ -2039,7 +2073,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
goto error_out;
}
sb->s_maxbytes = MAX_LFS_FILESIZE;
- unlock_kernel();
return 0;
error_out:
@@ -2060,7 +2093,6 @@ error_out:
kfree(sbi);
sb->s_fs_info = NULL;
- unlock_kernel();
return -EINVAL;
}
@@ -2099,8 +2131,6 @@ static void udf_put_super(struct super_block *sb)
sbi = UDF_SB(sb);
- lock_kernel();
-
if (sbi->s_vat_inode)
iput(sbi->s_vat_inode);
if (sbi->s_partitions)
@@ -2116,8 +2146,6 @@ static void udf_put_super(struct super_block *sb)
kfree(sbi->s_partmaps);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
-
- unlock_kernel();
}
static int udf_sync_fs(struct super_block *sb, int wait)
@@ -2180,8 +2208,6 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
uint16_t ident;
struct spaceBitmapDesc *bm;
- lock_kernel();
-
loc.logicalBlockNum = bitmap->s_extPosition;
loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
bh = udf_read_ptagged(sb, &loc, 0, &ident);
@@ -2218,10 +2244,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
}
}
brelse(bh);
-
out:
- unlock_kernel();
-
return accum;
}
@@ -2234,8 +2257,7 @@ static unsigned int udf_count_free_table(struct super_block *sb,
int8_t etype;
struct extent_position epos;
- lock_kernel();
-
+ mutex_lock(&UDF_SB(sb)->s_alloc_mutex);
epos.block = UDF_I(table)->i_location;
epos.offset = sizeof(struct unallocSpaceEntry);
epos.bh = NULL;
@@ -2244,8 +2266,7 @@ static unsigned int udf_count_free_table(struct super_block *sb,
accum += (elen >> table->i_sb->s_blocksize_bits);
brelse(epos.bh);
-
- unlock_kernel();
+ mutex_unlock(&UDF_SB(sb)->s_alloc_mutex);
return accum;
}
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 1606478..b1d4488 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -27,7 +27,6 @@
#include <linux/mm.h>
#include <linux/stat.h>
#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include "udf_i.h"
@@ -78,13 +77,16 @@ static int udf_symlink_filler(struct file *file, struct page *page)
int err = -EIO;
unsigned char *p = kmap(page);
struct udf_inode_info *iinfo;
+ uint32_t pos;
- lock_kernel();
iinfo = UDF_I(inode);
+ pos = udf_block_map(inode, 0);
+
+ down_read(&iinfo->i_data_sem);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
symlink = iinfo->i_ext.i_data + iinfo->i_lenEAttr;
} else {
- bh = sb_bread(inode->i_sb, udf_block_map(inode, 0));
+ bh = sb_bread(inode->i_sb, pos);
if (!bh)
goto out;
@@ -95,14 +97,14 @@ static int udf_symlink_filler(struct file *file, struct page *page)
udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p);
brelse(bh);
- unlock_kernel();
+ up_read(&iinfo->i_data_sem);
SetPageUptodate(page);
kunmap(page);
unlock_page(page);
return 0;
out:
- unlock_kernel();
+ up_read(&iinfo->i_data_sem);
SetPageError(page);
kunmap(page);
unlock_page(page);
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index e58d1de..d1bd31e 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -1,6 +1,18 @@
#ifndef _UDF_I_H
#define _UDF_I_H
+/*
+ * The i_data_sem and i_mutex serve for protection of allocation information
+ * of a regular files and symlinks. This includes all extents belonging to
+ * the file/symlink, a fact whether data are in-inode or in external data
+ * blocks, preallocation, goal block information... When extents are read,
+ * i_mutex or i_data_sem must be held (for reading is enough in case of
+ * i_data_sem). When extents are changed, i_data_sem must be held for writing
+ * and also i_mutex must be held.
+ *
+ * For directories i_mutex is used for all the necessary protection.
+ */
+
struct udf_inode_info {
struct timespec i_crtime;
/* Physical address of inode */
@@ -21,6 +33,7 @@ struct udf_inode_info {
struct long_ad *i_lad;
__u8 *i_data;
} i_ext;
+ struct rw_semaphore i_data_sem;
struct inode vfs_inode;
};
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index d113b72..4858c19 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -2,6 +2,7 @@
#define __LINUX_UDF_SB_H
#include <linux/mutex.h>
+#include <linux/bitops.h>
/* Since UDF 2.01 is ISO 13346 based... */
#define UDF_SUPER_MAGIC 0x15013346
@@ -128,6 +129,8 @@ struct udf_sb_info {
uid_t s_uid;
mode_t s_fmode;
mode_t s_dmode;
+ /* Lock protecting consistency of above permission settings */
+ rwlock_t s_cred_lock;
/* Root Info */
struct timespec s_record_time;
@@ -139,7 +142,7 @@ struct udf_sb_info {
__u16 s_udfrev;
/* Miscellaneous flags */
- __u32 s_flags;
+ unsigned long s_flags;
/* Encoding info */
struct nls_table *s_nls_map;
@@ -161,8 +164,19 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi);
int udf_compute_nr_groups(struct super_block *sb, u32 partition);
-#define UDF_QUERY_FLAG(X,Y) ( UDF_SB(X)->s_flags & ( 1 << (Y) ) )
-#define UDF_SET_FLAG(X,Y) ( UDF_SB(X)->s_flags |= ( 1 << (Y) ) )
-#define UDF_CLEAR_FLAG(X,Y) ( UDF_SB(X)->s_flags &= ~( 1 << (Y) ) )
+static inline int UDF_QUERY_FLAG(struct super_block *sb, int flag)
+{
+ return test_bit(flag, &UDF_SB(sb)->s_flags);
+}
+
+static inline void UDF_SET_FLAG(struct super_block *sb, int flag)
+{
+ set_bit(flag, &UDF_SB(sb)->s_flags);
+}
+
+static inline void UDF_CLEAR_FLAG(struct super_block *sb, int flag)
+{
+ clear_bit(flag, &UDF_SB(sb)->s_flags);
+}
#endif /* __LINUX_UDF_SB_H */
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 6995ab1..eba4820 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -111,6 +111,8 @@ struct extent_position {
};
/* super.c */
+
+__attribute__((format(printf, 3, 4)))
extern void udf_warning(struct super_block *, const char *, const char *, ...);
static inline void udf_updated_lvid(struct super_block *sb)
{
@@ -123,6 +125,7 @@ static inline void udf_updated_lvid(struct super_block *sb)
sb->s_dirt = 1;
UDF_SB(sb)->s_lvid_dirty = 1;
}
+extern u64 lvid_get_unique_id(struct super_block *sb);
/* namei.c */
extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
@@ -133,7 +136,6 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
extern long udf_ioctl(struct file *, unsigned int, unsigned long);
/* inode.c */
extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
-extern int udf_sync_inode(struct inode *);
extern void udf_expand_file_adinicb(struct inode *, int, int *);
extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
extern struct buffer_head *udf_bread(struct inode *, int, int, int *);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6b9be90..2c61ac5 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1412,11 +1412,18 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void ufs_destroy_inode(struct inode *inode)
+static void ufs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
}
+static void ufs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, ufs_i_callback);
+}
+
static void init_once(void *foo)
{
struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
@@ -1454,16 +1461,16 @@ static const struct super_operations ufs_super_ops = {
.show_options = ufs_show_options,
};
-static int ufs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ufs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ufs_fill_super);
}
static struct file_system_type ufs_fs_type = {
.owner = THIS_MODULE,
.name = "ufs",
- .get_sb = ufs_get_sb,
+ .mount = ufs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
deleted file mode 100644
index 4dfc7c3..0000000
--- a/fs/xfs/linux-2.6/sv.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_SV_H__
-#define __XFS_SUPPORT_SV_H__
-
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-
-/*
- * Synchronisation variables.
- *
- * (Parameters "pri", "svf" and "rts" are not implemented)
- */
-
-typedef struct sv_s {
- wait_queue_head_t waiters;
-} sv_t;
-
-static inline void _sv_wait(sv_t *sv, spinlock_t *lock)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue_exclusive(&sv->waiters, &wait);
- __set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(lock);
-
- schedule();
-
- remove_wait_queue(&sv->waiters, &wait);
-}
-
-#define sv_init(sv,flag,name) \
- init_waitqueue_head(&(sv)->waiters)
-#define sv_destroy(sv) \
- /*NOTHING*/
-#define sv_wait(sv, pri, lock, s) \
- _sv_wait(sv, lock)
-#define sv_signal(sv) \
- wake_up(&(sv)->waiters)
-#define sv_broadcast(sv) \
- wake_up_all(&(sv)->waiters)
-
-#endif /* __XFS_SUPPORT_SV_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index b277186..39f4f80 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -219,12 +219,13 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
}
int
-xfs_check_acl(struct inode *inode, int mask)
+xfs_check_acl(struct inode *inode, int mask, unsigned int flags)
{
- struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_inode *ip;
struct posix_acl *acl;
int error = -EAGAIN;
+ ip = XFS_I(inode);
trace_xfs_check_acl(ip);
/*
@@ -234,6 +235,12 @@ xfs_check_acl(struct inode *inode, int mask)
if (!XFS_IFORK_Q(ip))
return -EAGAIN;
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ return -EAGAIN;
+ }
+
acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c9af48f..ec7bbb5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -38,15 +38,6 @@
#include <linux/pagevec.h>
#include <linux/writeback.h>
-/*
- * Types of I/O for bmap clustering and I/O completion tracking.
- */
-enum {
- IO_READ, /* mapping for a read */
- IO_DELAY, /* mapping covers delalloc region */
- IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
- IO_NEW /* just allocated */
-};
/*
* Prime number of hash buckets since address is used as the key.
@@ -182,9 +173,6 @@ xfs_setfilesize(
xfs_inode_t *ip = XFS_I(ioend->io_inode);
xfs_fsize_t isize;
- ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
- ASSERT(ioend->io_type != IO_READ);
-
if (unlikely(ioend->io_error))
return 0;
@@ -244,10 +232,8 @@ xfs_end_io(
* We might have to update the on-disk file size after extending
* writes.
*/
- if (ioend->io_type != IO_READ) {
- error = xfs_setfilesize(ioend);
- ASSERT(!error || error == EAGAIN);
- }
+ error = xfs_setfilesize(ioend);
+ ASSERT(!error || error == EAGAIN);
/*
* If we didn't complete processing of the ioend, requeue it to the
@@ -318,14 +304,63 @@ STATIC int
xfs_map_blocks(
struct inode *inode,
loff_t offset,
- ssize_t count,
struct xfs_bmbt_irec *imap,
- int flags)
+ int type,
+ int nonblocking)
{
- int nmaps = 1;
- int new = 0;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ ssize_t count = 1 << inode->i_blkbits;
+ xfs_fileoff_t offset_fsb, end_fsb;
+ int error = 0;
+ int bmapi_flags = XFS_BMAPI_ENTIRE;
+ int nimaps = 1;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -XFS_ERROR(EIO);
+
+ if (type == IO_UNWRITTEN)
+ bmapi_flags |= XFS_BMAPI_IGSTATE;
+
+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+ if (nonblocking)
+ return -XFS_ERROR(EAGAIN);
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ }
+
+ ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
+ (ip->i_df.if_flags & XFS_IFEXTENTS));
+ ASSERT(offset <= mp->m_maxioffset);
+
+ if (offset + count > mp->m_maxioffset)
+ count = mp->m_maxioffset - offset;
+ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
+ bmapi_flags, NULL, 0, imap, &nimaps, NULL);
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
- return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new);
+ if (error)
+ return -XFS_ERROR(error);
+
+ if (type == IO_DELALLOC &&
+ (!nimaps || isnullstartblock(imap->br_startblock))) {
+ error = xfs_iomap_write_allocate(ip, offset, count, imap);
+ if (!error)
+ trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
+ return -XFS_ERROR(error);
+ }
+
+#ifdef DEBUG
+ if (type == IO_UNWRITTEN) {
+ ASSERT(nimaps);
+ ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+ ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+ }
+#endif
+ if (nimaps)
+ trace_xfs_map_blocks_found(ip, offset, count, type, imap);
+ return 0;
}
STATIC int
@@ -380,26 +415,18 @@ xfs_submit_ioend_bio(
submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC_PLUG : WRITE, bio);
- ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
- bio_put(bio);
}
STATIC struct bio *
xfs_alloc_ioend_bio(
struct buffer_head *bh)
{
- struct bio *bio;
int nvecs = bio_get_nr_vecs(bh->b_bdev);
-
- do {
- bio = bio_alloc(GFP_NOIO, nvecs);
- nvecs >>= 1;
- } while (!bio);
+ struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
ASSERT(bio->bi_private == NULL);
bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
- bio_get(bio);
return bio;
}
@@ -470,9 +497,8 @@ xfs_submit_ioend(
/* Pass 1 - start writeback */
do {
next = ioend->io_list;
- for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+ for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
xfs_start_buffer_writeback(bh);
- }
} while ((ioend = next) != NULL);
/* Pass 2 - submit I/O */
@@ -600,117 +626,13 @@ xfs_map_at_offset(
ASSERT(imap->br_startblock != HOLESTARTBLOCK);
ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
- lock_buffer(bh);
xfs_map_buffer(inode, bh, imap, offset);
- bh->b_bdev = xfs_find_bdev_for_inode(inode);
set_buffer_mapped(bh);
clear_buffer_delay(bh);
clear_buffer_unwritten(bh);
}
/*
- * Look for a page at index that is suitable for clustering.
- */
-STATIC unsigned int
-xfs_probe_page(
- struct page *page,
- unsigned int pg_offset)
-{
- struct buffer_head *bh, *head;
- int ret = 0;
-
- if (PageWriteback(page))
- return 0;
- if (!PageDirty(page))
- return 0;
- if (!page->mapping)
- return 0;
- if (!page_has_buffers(page))
- return 0;
-
- bh = head = page_buffers(page);
- do {
- if (!buffer_uptodate(bh))
- break;
- if (!buffer_mapped(bh))
- break;
- ret += bh->b_size;
- if (ret >= pg_offset)
- break;
- } while ((bh = bh->b_this_page) != head);
-
- return ret;
-}
-
-STATIC size_t
-xfs_probe_cluster(
- struct inode *inode,
- struct page *startpage,
- struct buffer_head *bh,
- struct buffer_head *head)
-{
- struct pagevec pvec;
- pgoff_t tindex, tlast, tloff;
- size_t total = 0;
- int done = 0, i;
-
- /* First sum forwards in this page */
- do {
- if (!buffer_uptodate(bh) || !buffer_mapped(bh))
- return total;
- total += bh->b_size;
- } while ((bh = bh->b_this_page) != head);
-
- /* if we reached the end of the page, sum forwards in following pages */
- tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
- tindex = startpage->index + 1;
-
- /* Prune this back to avoid pathological behavior */
- tloff = min(tlast, startpage->index + 64);
-
- pagevec_init(&pvec, 0);
- while (!done && tindex <= tloff) {
- unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
- if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
- break;
-
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
- size_t pg_offset, pg_len = 0;
-
- if (tindex == tlast) {
- pg_offset =
- i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
- if (!pg_offset) {
- done = 1;
- break;
- }
- } else
- pg_offset = PAGE_CACHE_SIZE;
-
- if (page->index == tindex && trylock_page(page)) {
- pg_len = xfs_probe_page(page, pg_offset);
- unlock_page(page);
- }
-
- if (!pg_len) {
- done = 1;
- break;
- }
-
- total += pg_len;
- tindex++;
- }
-
- pagevec_release(&pvec);
- cond_resched();
- }
-
- return total;
-}
-
-/*
* Test if a given page is suitable for writing as part of an unwritten
* or delayed allocate extent.
*/
@@ -731,9 +653,9 @@ xfs_is_delayed_page(
if (buffer_unwritten(bh))
acceptable = (type == IO_UNWRITTEN);
else if (buffer_delay(bh))
- acceptable = (type == IO_DELAY);
+ acceptable = (type == IO_DELALLOC);
else if (buffer_dirty(bh) && buffer_mapped(bh))
- acceptable = (type == IO_NEW);
+ acceptable = (type == IO_OVERWRITE);
else
break;
} while ((bh = bh->b_this_page) != head);
@@ -758,8 +680,7 @@ xfs_convert_page(
loff_t tindex,
struct xfs_bmbt_irec *imap,
xfs_ioend_t **ioendp,
- struct writeback_control *wbc,
- int all_bh)
+ struct writeback_control *wbc)
{
struct buffer_head *bh, *head;
xfs_off_t end_offset;
@@ -814,37 +735,30 @@ xfs_convert_page(
continue;
}
- if (buffer_unwritten(bh) || buffer_delay(bh)) {
+ if (buffer_unwritten(bh) || buffer_delay(bh) ||
+ buffer_mapped(bh)) {
if (buffer_unwritten(bh))
type = IO_UNWRITTEN;
+ else if (buffer_delay(bh))
+ type = IO_DELALLOC;
else
- type = IO_DELAY;
+ type = IO_OVERWRITE;
if (!xfs_imap_valid(inode, imap, offset)) {
done = 1;
continue;
}
- ASSERT(imap->br_startblock != HOLESTARTBLOCK);
- ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
- xfs_map_at_offset(inode, bh, imap, offset);
+ lock_buffer(bh);
+ if (type != IO_OVERWRITE)
+ xfs_map_at_offset(inode, bh, imap, offset);
xfs_add_to_ioend(inode, bh, offset, type,
ioendp, done);
page_dirty--;
count++;
} else {
- type = IO_NEW;
- if (buffer_mapped(bh) && all_bh) {
- lock_buffer(bh);
- xfs_add_to_ioend(inode, bh, offset,
- type, ioendp, done);
- count++;
- page_dirty--;
- } else {
- done = 1;
- }
+ done = 1;
}
} while (offset += len, (bh = bh->b_this_page) != head);
@@ -876,7 +790,6 @@ xfs_cluster_write(
struct xfs_bmbt_irec *imap,
xfs_ioend_t **ioendp,
struct writeback_control *wbc,
- int all_bh,
pgoff_t tlast)
{
struct pagevec pvec;
@@ -891,7 +804,7 @@ xfs_cluster_write(
for (i = 0; i < pagevec_count(&pvec); i++) {
done = xfs_convert_page(inode, pvec.pages[i], tindex++,
- imap, ioendp, wbc, all_bh);
+ imap, ioendp, wbc);
if (done)
break;
}
@@ -934,9 +847,8 @@ xfs_aops_discard_page(
struct xfs_inode *ip = XFS_I(inode);
struct buffer_head *bh, *head;
loff_t offset = page_offset(page);
- ssize_t len = 1 << inode->i_blkbits;
- if (!xfs_is_delayed_page(page, IO_DELAY))
+ if (!xfs_is_delayed_page(page, IO_DELALLOC))
goto out_invalidate;
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -949,58 +861,14 @@ xfs_aops_discard_page(
xfs_ilock(ip, XFS_ILOCK_EXCL);
bh = head = page_buffers(page);
do {
- int done;
- xfs_fileoff_t offset_fsb;
- xfs_bmbt_irec_t imap;
- int nimaps = 1;
int error;
- xfs_fsblock_t firstblock;
- xfs_bmap_free_t flist;
+ xfs_fileoff_t start_fsb;
if (!buffer_delay(bh))
goto next_buffer;
- offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
-
- /*
- * Map the range first and check that it is a delalloc extent
- * before trying to unmap the range. Otherwise we will be
- * trying to remove a real extent (which requires a
- * transaction) or a hole, which is probably a bad idea...
- */
- error = xfs_bmapi(NULL, ip, offset_fsb, 1,
- XFS_BMAPI_ENTIRE, NULL, 0, &imap,
- &nimaps, NULL);
-
- if (error) {
- /* something screwed, just bail */
- if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
- "page discard failed delalloc mapping lookup.");
- }
- break;
- }
- if (!nimaps) {
- /* nothing there */
- goto next_buffer;
- }
- if (imap.br_startblock != DELAYSTARTBLOCK) {
- /* been converted, ignore */
- goto next_buffer;
- }
- WARN_ON(imap.br_blockcount == 0);
-
- /*
- * Note: while we initialise the firstblock/flist pair, they
- * should never be used because blocks should never be
- * allocated or freed for a delalloc extent and hence we need
- * don't cancel or finish them after the xfs_bunmapi() call.
- */
- xfs_bmap_init(&flist, &firstblock);
- error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
- &flist, &done);
-
- ASSERT(!flist.xbf_count && !flist.xbf_first);
+ start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+ error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
if (error) {
/* something screwed, just bail */
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
@@ -1010,7 +878,7 @@ xfs_aops_discard_page(
break;
}
next_buffer:
- offset += len;
+ offset += 1 << inode->i_blkbits;
} while ((bh = bh->b_this_page) != head);
@@ -1047,10 +915,10 @@ xfs_vm_writepage(
unsigned int type;
__uint64_t end_offset;
pgoff_t end_index, last_index;
- ssize_t size, len;
- int flags, err, imap_valid = 0, uptodate = 1;
+ ssize_t len;
+ int err, imap_valid = 0, uptodate = 1;
int count = 0;
- int all_bh = 0;
+ int nonblocking = 0;
trace_xfs_writepage(inode, page, 0);
@@ -1101,109 +969,78 @@ xfs_vm_writepage(
bh = head = page_buffers(page);
offset = page_offset(page);
- flags = BMAPI_READ;
- type = IO_NEW;
+ type = IO_OVERWRITE;
+
+ if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
+ nonblocking = 1;
do {
+ int new_ioend = 0;
+
if (offset >= end_offset)
break;
if (!buffer_uptodate(bh))
uptodate = 0;
/*
- * A hole may still be marked uptodate because discard_buffer
- * leaves the flag set.
+ * set_page_dirty dirties all buffers in a page, independent
+ * of their state. The dirty state however is entirely
+ * meaningless for holes (!mapped && uptodate), so skip
+ * buffers covering holes here.
*/
if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
- ASSERT(!buffer_dirty(bh));
imap_valid = 0;
continue;
}
- if (imap_valid)
- imap_valid = xfs_imap_valid(inode, &imap, offset);
-
- if (buffer_unwritten(bh) || buffer_delay(bh)) {
- int new_ioend = 0;
-
- /*
- * Make sure we don't use a read-only iomap
- */
- if (flags == BMAPI_READ)
- imap_valid = 0;
-
- if (buffer_unwritten(bh)) {
+ if (buffer_unwritten(bh)) {
+ if (type != IO_UNWRITTEN) {
type = IO_UNWRITTEN;
- flags = BMAPI_WRITE | BMAPI_IGNSTATE;
- } else if (buffer_delay(bh)) {
- type = IO_DELAY;
- flags = BMAPI_ALLOCATE;
-
- if (wbc->sync_mode == WB_SYNC_NONE)
- flags |= BMAPI_TRYLOCK;
- }
-
- if (!imap_valid) {
- /*
- * If we didn't have a valid mapping then we
- * need to ensure that we put the new mapping
- * in a new ioend structure. This needs to be
- * done to ensure that the ioends correctly
- * reflect the block mappings at io completion
- * for unwritten extent conversion.
- */
- new_ioend = 1;
- err = xfs_map_blocks(inode, offset, len,
- &imap, flags);
- if (err)
- goto error;
- imap_valid = xfs_imap_valid(inode, &imap,
- offset);
+ imap_valid = 0;
}
- if (imap_valid) {
- xfs_map_at_offset(inode, bh, &imap, offset);
- xfs_add_to_ioend(inode, bh, offset, type,
- &ioend, new_ioend);
- count++;
+ } else if (buffer_delay(bh)) {
+ if (type != IO_DELALLOC) {
+ type = IO_DELALLOC;
+ imap_valid = 0;
}
} else if (buffer_uptodate(bh)) {
- /*
- * we got here because the buffer is already mapped.
- * That means it must already have extents allocated
- * underneath it. Map the extent by reading it.
- */
- if (!imap_valid || flags != BMAPI_READ) {
- flags = BMAPI_READ;
- size = xfs_probe_cluster(inode, page, bh, head);
- err = xfs_map_blocks(inode, offset, size,
- &imap, flags);
- if (err)
- goto error;
- imap_valid = xfs_imap_valid(inode, &imap,
- offset);
+ if (type != IO_OVERWRITE) {
+ type = IO_OVERWRITE;
+ imap_valid = 0;
+ }
+ } else {
+ if (PageUptodate(page)) {
+ ASSERT(buffer_mapped(bh));
+ imap_valid = 0;
}
+ continue;
+ }
+ if (imap_valid)
+ imap_valid = xfs_imap_valid(inode, &imap, offset);
+ if (!imap_valid) {
/*
- * We set the type to IO_NEW in case we are doing a
- * small write at EOF that is extending the file but
- * without needing an allocation. We need to update the
- * file size on I/O completion in this case so it is
- * the same case as having just allocated a new extent
- * that we are writing into for the first time.
+ * If we didn't have a valid mapping then we need to
+ * put the new mapping into a separate ioend structure.
+ * This ensures non-contiguous extents always have
+ * separate ioends, which is particularly important
+ * for unwritten extent conversion at I/O completion
+ * time.
*/
- type = IO_NEW;
- if (trylock_buffer(bh)) {
- if (imap_valid)
- all_bh = 1;
- xfs_add_to_ioend(inode, bh, offset, type,
- &ioend, !imap_valid);
- count++;
- } else {
- imap_valid = 0;
- }
- } else if (PageUptodate(page)) {
- ASSERT(buffer_mapped(bh));
- imap_valid = 0;
+ new_ioend = 1;
+ err = xfs_map_blocks(inode, offset, &imap, type,
+ nonblocking);
+ if (err)
+ goto error;
+ imap_valid = xfs_imap_valid(inode, &imap, offset);
+ }
+ if (imap_valid) {
+ lock_buffer(bh);
+ if (type != IO_OVERWRITE)
+ xfs_map_at_offset(inode, bh, &imap, offset);
+ xfs_add_to_ioend(inode, bh, offset, type, &ioend,
+ new_ioend);
+ count++;
}
if (!iohead)
@@ -1232,7 +1069,7 @@ xfs_vm_writepage(
end_index = last_index;
xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
- wbc, all_bh, end_index);
+ wbc, end_index);
}
if (iohead)
@@ -1301,13 +1138,19 @@ __xfs_get_blocks(
int create,
int direct)
{
- int flags = create ? BMAPI_WRITE : BMAPI_READ;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t offset_fsb, end_fsb;
+ int error = 0;
+ int lockmode = 0;
struct xfs_bmbt_irec imap;
+ int nimaps = 1;
xfs_off_t offset;
ssize_t size;
- int nimap = 1;
int new = 0;
- int error;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -XFS_ERROR(EIO);
offset = (xfs_off_t)iblock << inode->i_blkbits;
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
@@ -1316,15 +1159,45 @@ __xfs_get_blocks(
if (!create && direct && offset >= i_size_read(inode))
return 0;
- if (direct && create)
- flags |= BMAPI_DIRECT;
+ if (create) {
+ lockmode = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, lockmode);
+ } else {
+ lockmode = xfs_ilock_map_shared(ip);
+ }
+
+ ASSERT(offset <= mp->m_maxioffset);
+ if (offset + size > mp->m_maxioffset)
+ size = mp->m_maxioffset - offset;
+ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
- error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap,
- &new);
+ error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
+ XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL);
if (error)
- return -error;
- if (nimap == 0)
- return 0;
+ goto out_unlock;
+
+ if (create &&
+ (!nimaps ||
+ (imap.br_startblock == HOLESTARTBLOCK ||
+ imap.br_startblock == DELAYSTARTBLOCK))) {
+ if (direct) {
+ error = xfs_iomap_write_direct(ip, offset, size,
+ &imap, nimaps);
+ } else {
+ error = xfs_iomap_write_delay(ip, offset, size, &imap);
+ }
+ if (error)
+ goto out_unlock;
+
+ trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
+ } else if (nimaps) {
+ trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
+ } else {
+ trace_xfs_get_blocks_notfound(ip, offset, size);
+ goto out_unlock;
+ }
+ xfs_iunlock(ip, lockmode);
if (imap.br_startblock != HOLESTARTBLOCK &&
imap.br_startblock != DELAYSTARTBLOCK) {
@@ -1391,6 +1264,10 @@ __xfs_get_blocks(
}
return 0;
+
+out_unlock:
+ xfs_iunlock(ip, lockmode);
+ return -error;
}
int
@@ -1478,7 +1355,7 @@ xfs_vm_direct_IO(
ssize_t ret;
if (rw & WRITE) {
- iocb->private = xfs_alloc_ioend(inode, IO_NEW);
+ iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
offset, nr_segs,
@@ -1504,11 +1381,42 @@ xfs_vm_write_failed(
struct inode *inode = mapping->host;
if (to > inode->i_size) {
- struct iattr ia = {
- .ia_valid = ATTR_SIZE | ATTR_FORCE,
- .ia_size = inode->i_size,
- };
- xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
+ /*
+ * punch out the delalloc blocks we have already allocated. We
+ * don't call xfs_setattr() to do this as we may be in the
+ * middle of a multi-iovec write and so the vfs inode->i_size
+ * will not match the xfs ip->i_size and so it will zero too
+ * much. Hence we jus truncate the page cache to zero what is
+ * necessary and punch the delalloc blocks directly.
+ */
+ struct xfs_inode *ip = XFS_I(inode);
+ xfs_fileoff_t start_fsb;
+ xfs_fileoff_t end_fsb;
+ int error;
+
+ truncate_pagecache(inode, to, inode->i_size);
+
+ /*
+ * Check if there are any blocks that are outside of i_size
+ * that need to be trimmed back.
+ */
+ start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
+ end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
+ if (end_fsb <= start_fsb)
+ return;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+ end_fsb - start_fsb);
+ if (error) {
+ /* something screwed, just bail */
+ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+ "xfs_vm_write_failed: unable to clean up ino %lld",
+ ip->i_ino);
+ }
+ }
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
}
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index c5057fb..71f721e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -23,6 +23,22 @@ extern struct workqueue_struct *xfsconvertd_workqueue;
extern mempool_t *xfs_ioend_pool;
/*
+ * Types of I/O for bmap clustering and I/O completion tracking.
+ */
+enum {
+ IO_DIRECT = 0, /* special case for direct I/O ioends */
+ IO_DELALLOC, /* mapping covers delalloc region */
+ IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
+ IO_OVERWRITE, /* mapping covers already allocated extent */
+};
+
+#define XFS_IO_TYPES \
+ { 0, "" }, \
+ { IO_DELALLOC, "delalloc" }, \
+ { IO_UNWRITTEN, "unwritten" }, \
+ { IO_OVERWRITE, "overwrite" }
+
+/*
* xfs_ioend struct manages large extent writes for XFS.
* It can manage several multi-page bio's at once.
*/
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 63fd2c0..92f1f2a 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -44,12 +44,7 @@
static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
-STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
-static struct shrinker xfs_buf_shake = {
- .shrink = xfsbufd_wakeup,
- .seeks = DEFAULT_SEEKS,
-};
static struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
@@ -168,8 +163,79 @@ test_page_region(
}
/*
- * Internal xfs_buf_t object manipulation
+ * xfs_buf_lru_add - add a buffer to the LRU.
+ *
+ * The LRU takes a new reference to the buffer so that it will only be freed
+ * once the shrinker takes the buffer off the LRU.
*/
+STATIC void
+xfs_buf_lru_add(
+ struct xfs_buf *bp)
+{
+ struct xfs_buftarg *btp = bp->b_target;
+
+ spin_lock(&btp->bt_lru_lock);
+ if (list_empty(&bp->b_lru)) {
+ atomic_inc(&bp->b_hold);
+ list_add_tail(&bp->b_lru, &btp->bt_lru);
+ btp->bt_lru_nr++;
+ }
+ spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * xfs_buf_lru_del - remove a buffer from the LRU
+ *
+ * The unlocked check is safe here because it only occurs when there are not
+ * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
+ * to optimise the shrinker removing the buffer from the LRU and calling
+ * xfs_buf_free(). i.e. it removes an unneccessary round trip on the
+ * bt_lru_lock.
+ */
+STATIC void
+xfs_buf_lru_del(
+ struct xfs_buf *bp)
+{
+ struct xfs_buftarg *btp = bp->b_target;
+
+ if (list_empty(&bp->b_lru))
+ return;
+
+ spin_lock(&btp->bt_lru_lock);
+ if (!list_empty(&bp->b_lru)) {
+ list_del_init(&bp->b_lru);
+ btp->bt_lru_nr--;
+ }
+ spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * When we mark a buffer stale, we remove the buffer from the LRU and clear the
+ * b_lru_ref count so that the buffer is freed immediately when the buffer
+ * reference count falls to zero. If the buffer is already on the LRU, we need
+ * to remove the reference that LRU holds on the buffer.
+ *
+ * This prevents build-up of stale buffers on the LRU.
+ */
+void
+xfs_buf_stale(
+ struct xfs_buf *bp)
+{
+ bp->b_flags |= XBF_STALE;
+ atomic_set(&(bp)->b_lru_ref, 0);
+ if (!list_empty(&bp->b_lru)) {
+ struct xfs_buftarg *btp = bp->b_target;
+
+ spin_lock(&btp->bt_lru_lock);
+ if (!list_empty(&bp->b_lru)) {
+ list_del_init(&bp->b_lru);
+ btp->bt_lru_nr--;
+ atomic_dec(&bp->b_hold);
+ }
+ spin_unlock(&btp->bt_lru_lock);
+ }
+ ASSERT(atomic_read(&bp->b_hold) >= 1);
+}
STATIC void
_xfs_buf_initialize(
@@ -186,7 +252,9 @@ _xfs_buf_initialize(
memset(bp, 0, sizeof(xfs_buf_t));
atomic_set(&bp->b_hold, 1);
+ atomic_set(&bp->b_lru_ref, 1);
init_completion(&bp->b_iowait);
+ INIT_LIST_HEAD(&bp->b_lru);
INIT_LIST_HEAD(&bp->b_list);
RB_CLEAR_NODE(&bp->b_rbnode);
sema_init(&bp->b_sema, 0); /* held, no waiters */
@@ -262,6 +330,8 @@ xfs_buf_free(
{
trace_xfs_buf_free(bp, _RET_IP_);
+ ASSERT(list_empty(&bp->b_lru));
+
if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
uint i;
@@ -337,7 +407,6 @@ _xfs_buf_lookup_pages(
__func__, gfp_mask);
XFS_STATS_INC(xb_page_retries);
- xfsbufd_wakeup(NULL, 0, gfp_mask);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
@@ -488,29 +557,16 @@ found:
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
- /* Attempt to get the semaphore without sleeping,
- * if this does not work then we need to drop the
- * spinlock and do a hard attempt on the semaphore.
- */
- if (down_trylock(&bp->b_sema)) {
+ if (xfs_buf_cond_lock(bp)) {
+ /* failed, so wait for the lock if requested. */
if (!(flags & XBF_TRYLOCK)) {
- /* wait for buffer ownership */
xfs_buf_lock(bp);
XFS_STATS_INC(xb_get_locked_waited);
} else {
- /* We asked for a trylock and failed, no need
- * to look at file offset and length here, we
- * know that this buffer at least overlaps our
- * buffer and is locked, therefore our buffer
- * either does not exist, or is this buffer.
- */
xfs_buf_rele(bp);
XFS_STATS_INC(xb_busy_locked);
return NULL;
}
- } else {
- /* trylock worked */
- XB_SET_OWNER(bp);
}
if (bp->b_flags & XBF_STALE) {
@@ -841,6 +897,7 @@ xfs_buf_rele(
if (!pag) {
ASSERT(!bp->b_relse);
+ ASSERT(list_empty(&bp->b_lru));
ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
if (atomic_dec_and_test(&bp->b_hold))
xfs_buf_free(bp);
@@ -848,13 +905,19 @@ xfs_buf_rele(
}
ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
+
ASSERT(atomic_read(&bp->b_hold) > 0);
if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
if (bp->b_relse) {
atomic_inc(&bp->b_hold);
spin_unlock(&pag->pag_buf_lock);
bp->b_relse(bp);
+ } else if (!(bp->b_flags & XBF_STALE) &&
+ atomic_read(&bp->b_lru_ref)) {
+ xfs_buf_lru_add(bp);
+ spin_unlock(&pag->pag_buf_lock);
} else {
+ xfs_buf_lru_del(bp);
ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
spin_unlock(&pag->pag_buf_lock);
@@ -876,10 +939,18 @@ xfs_buf_rele(
*/
/*
- * Locks a buffer object, if it is not already locked.
- * Note that this in no way locks the underlying pages, so it is only
- * useful for synchronizing concurrent use of buffer objects, not for
- * synchronizing independent access to the underlying pages.
+ * Locks a buffer object, if it is not already locked. Note that this in
+ * no way locks the underlying pages, so it is only useful for
+ * synchronizing concurrent use of buffer objects, not for synchronizing
+ * independent access to the underlying pages.
+ *
+ * If we come across a stale, pinned, locked buffer, we know that we are
+ * being asked to lock a buffer that has been reallocated. Because it is
+ * pinned, we know that the log has not been pushed to disk and hence it
+ * will still be locked. Rather than continuing to have trylock attempts
+ * fail until someone else pushes the log, push it ourselves before
+ * returning. This means that the xfsaild will not get stuck trying
+ * to push on stale inode buffers.
*/
int
xfs_buf_cond_lock(
@@ -890,6 +961,8 @@ xfs_buf_cond_lock(
locked = down_trylock(&bp->b_sema) == 0;
if (locked)
XB_SET_OWNER(bp);
+ else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+ xfs_log_force(bp->b_target->bt_mount, 0);
trace_xfs_buf_cond_lock(bp, _RET_IP_);
return locked ? 0 : -EBUSY;
@@ -1441,51 +1514,84 @@ xfs_buf_iomove(
*/
/*
- * Wait for any bufs with callbacks that have been submitted but
- * have not yet returned... walk the hash list for the target.
+ * Wait for any bufs with callbacks that have been submitted but have not yet
+ * returned. These buffers will have an elevated hold count, so wait on those
+ * while freeing all the buffers only held by the LRU.
*/
void
xfs_wait_buftarg(
struct xfs_buftarg *btp)
{
- struct xfs_perag *pag;
- uint i;
+ struct xfs_buf *bp;
- for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
- pag = xfs_perag_get(btp->bt_mount, i);
- spin_lock(&pag->pag_buf_lock);
- while (rb_first(&pag->pag_buf_tree)) {
- spin_unlock(&pag->pag_buf_lock);
+restart:
+ spin_lock(&btp->bt_lru_lock);
+ while (!list_empty(&btp->bt_lru)) {
+ bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+ if (atomic_read(&bp->b_hold) > 1) {
+ spin_unlock(&btp->bt_lru_lock);
delay(100);
- spin_lock(&pag->pag_buf_lock);
+ goto restart;
}
- spin_unlock(&pag->pag_buf_lock);
- xfs_perag_put(pag);
+ /*
+ * clear the LRU reference count so the bufer doesn't get
+ * ignored in xfs_buf_rele().
+ */
+ atomic_set(&bp->b_lru_ref, 0);
+ spin_unlock(&btp->bt_lru_lock);
+ xfs_buf_rele(bp);
+ spin_lock(&btp->bt_lru_lock);
}
+ spin_unlock(&btp->bt_lru_lock);
}
-/*
- * buftarg list for delwrite queue processing
- */
-static LIST_HEAD(xfs_buftarg_list);
-static DEFINE_SPINLOCK(xfs_buftarg_lock);
-
-STATIC void
-xfs_register_buftarg(
- xfs_buftarg_t *btp)
+int
+xfs_buftarg_shrink(
+ struct shrinker *shrink,
+ int nr_to_scan,
+ gfp_t mask)
{
- spin_lock(&xfs_buftarg_lock);
- list_add(&btp->bt_list, &xfs_buftarg_list);
- spin_unlock(&xfs_buftarg_lock);
-}
+ struct xfs_buftarg *btp = container_of(shrink,
+ struct xfs_buftarg, bt_shrinker);
+ struct xfs_buf *bp;
+ LIST_HEAD(dispose);
-STATIC void
-xfs_unregister_buftarg(
- xfs_buftarg_t *btp)
-{
- spin_lock(&xfs_buftarg_lock);
- list_del(&btp->bt_list);
- spin_unlock(&xfs_buftarg_lock);
+ if (!nr_to_scan)
+ return btp->bt_lru_nr;
+
+ spin_lock(&btp->bt_lru_lock);
+ while (!list_empty(&btp->bt_lru)) {
+ if (nr_to_scan-- <= 0)
+ break;
+
+ bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+
+ /*
+ * Decrement the b_lru_ref count unless the value is already
+ * zero. If the value is already zero, we need to reclaim the
+ * buffer, otherwise it gets another trip through the LRU.
+ */
+ if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
+ list_move_tail(&bp->b_lru, &btp->bt_lru);
+ continue;
+ }
+
+ /*
+ * remove the buffer from the LRU now to avoid needing another
+ * lock round trip inside xfs_buf_rele().
+ */
+ list_move(&bp->b_lru, &dispose);
+ btp->bt_lru_nr--;
+ }
+ spin_unlock(&btp->bt_lru_lock);
+
+ while (!list_empty(&dispose)) {
+ bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
+ list_del_init(&bp->b_lru);
+ xfs_buf_rele(bp);
+ }
+
+ return btp->bt_lru_nr;
}
void
@@ -1493,17 +1599,14 @@ xfs_free_buftarg(
struct xfs_mount *mp,
struct xfs_buftarg *btp)
{
+ unregister_shrinker(&btp->bt_shrinker);
+
xfs_flush_buftarg(btp, 1);
if (mp->m_flags & XFS_MOUNT_BARRIER)
xfs_blkdev_issue_flush(btp);
iput(btp->bt_mapping->host);
- /* Unregister the buftarg first so that we don't get a
- * wakeup finding a non-existent task
- */
- xfs_unregister_buftarg(btp);
kthread_stop(btp->bt_task);
-
kmem_free(btp);
}
@@ -1600,20 +1703,13 @@ xfs_alloc_delwrite_queue(
xfs_buftarg_t *btp,
const char *fsname)
{
- int error = 0;
-
- INIT_LIST_HEAD(&btp->bt_list);
INIT_LIST_HEAD(&btp->bt_delwrite_queue);
spin_lock_init(&btp->bt_delwrite_lock);
btp->bt_flags = 0;
btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
- if (IS_ERR(btp->bt_task)) {
- error = PTR_ERR(btp->bt_task);
- goto out_error;
- }
- xfs_register_buftarg(btp);
-out_error:
- return error;
+ if (IS_ERR(btp->bt_task))
+ return PTR_ERR(btp->bt_task);
+ return 0;
}
xfs_buftarg_t *
@@ -1630,12 +1726,17 @@ xfs_alloc_buftarg(
btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev;
+ INIT_LIST_HEAD(&btp->bt_lru);
+ spin_lock_init(&btp->bt_lru_lock);
if (xfs_setsize_buftarg_early(btp, bdev))
goto error;
if (xfs_mapping_buftarg(btp, bdev))
goto error;
if (xfs_alloc_delwrite_queue(btp, fsname))
goto error;
+ btp->bt_shrinker.shrink = xfs_buftarg_shrink;
+ btp->bt_shrinker.seeks = DEFAULT_SEEKS;
+ register_shrinker(&btp->bt_shrinker);
return btp;
error:
@@ -1740,27 +1841,6 @@ xfs_buf_runall_queues(
flush_workqueue(queue);
}
-STATIC int
-xfsbufd_wakeup(
- struct shrinker *shrink,
- int priority,
- gfp_t mask)
-{
- xfs_buftarg_t *btp;
-
- spin_lock(&xfs_buftarg_lock);
- list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
- if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
- continue;
- if (list_empty(&btp->bt_delwrite_queue))
- continue;
- set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
- wake_up_process(btp->bt_task);
- }
- spin_unlock(&xfs_buftarg_lock);
- return 0;
-}
-
/*
* Move as many buffers as specified to the supplied list
* idicating if we skipped any buffers to prevent deadlocks.
@@ -1781,7 +1861,6 @@ xfs_buf_delwri_split(
INIT_LIST_HEAD(list);
spin_lock(dwlk);
list_for_each_entry_safe(bp, n, dwq, b_list) {
- trace_xfs_buf_delwri_split(bp, _RET_IP_);
ASSERT(bp->b_flags & XBF_DELWRI);
if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1795,6 +1874,7 @@ xfs_buf_delwri_split(
_XBF_RUN_QUEUES);
bp->b_flags |= XBF_WRITE;
list_move_tail(&bp->b_list, list);
+ trace_xfs_buf_delwri_split(bp, _RET_IP_);
} else
skipped++;
}
@@ -1955,7 +2035,6 @@ xfs_buf_init(void)
if (!xfsconvertd_workqueue)
goto out_destroy_xfsdatad_workqueue;
- register_shrinker(&xfs_buf_shake);
return 0;
out_destroy_xfsdatad_workqueue:
@@ -1971,7 +2050,6 @@ xfs_buf_init(void)
void
xfs_buf_terminate(void)
{
- unregister_shrinker(&xfs_buf_shake);
destroy_workqueue(xfsconvertd_workqueue);
destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 383a3f3..a76c242 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -128,10 +128,15 @@ typedef struct xfs_buftarg {
/* per device delwri queue */
struct task_struct *bt_task;
- struct list_head bt_list;
struct list_head bt_delwrite_queue;
spinlock_t bt_delwrite_lock;
unsigned long bt_flags;
+
+ /* LRU control structures */
+ struct shrinker bt_shrinker;
+ struct list_head bt_lru;
+ spinlock_t bt_lru_lock;
+ unsigned int bt_lru_nr;
} xfs_buftarg_t;
/*
@@ -164,9 +169,11 @@ typedef struct xfs_buf {
xfs_off_t b_file_offset; /* offset in file */
size_t b_buffer_length;/* size of buffer in bytes */
atomic_t b_hold; /* reference count */
+ atomic_t b_lru_ref; /* lru reclaim ref count */
xfs_buf_flags_t b_flags; /* status flags */
struct semaphore b_sema; /* semaphore for lockables */
+ struct list_head b_lru; /* lru list */
wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list;
struct xfs_perag *b_pag; /* contains rbtree root */
@@ -264,7 +271,8 @@ extern void xfs_buf_terminate(void);
#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
-#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE)
+void xfs_buf_stale(struct xfs_buf *bp);
+#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
#define XFS_BUF_SUPER_STALE(bp) do { \
@@ -328,9 +336,15 @@ extern void xfs_buf_terminate(void);
#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0)
+static inline void
+xfs_buf_set_ref(
+ struct xfs_buf *bp,
+ int lru_ref)
+{
+ atomic_set(&bp->b_lru_ref, lru_ref);
+}
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref)
#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
-#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 3764d74..fc0114d 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -70,8 +70,16 @@ xfs_fs_encode_fh(
else
fileid_type = FILEID_INO32_GEN_PARENT;
- /* filesystem may contain 64bit inode numbers */
- if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS))
+ /*
+ * If the the filesystem may contain 64bit inode numbers, we need
+ * to use larger file handles that can represent them.
+ *
+ * While we only allocate inodes that do not fit into 32 bits any
+ * large enough filesystem may contain them, thus the slightly
+ * confusing looking conditional below.
+ */
+ if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
+ (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
fileid_type |= XFS_FILEID_TYPE_64FLAG;
/*
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 2ea238f6..ad442d9 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -416,7 +416,7 @@ xfs_attrlist_by_handle(
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+ kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
if (!kbuf)
goto out_dput;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 96107ef..94d5fd6 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -762,7 +762,8 @@ xfs_setup_inode(
inode->i_state = I_NEW;
inode_sb_list_add(inode);
- insert_inode_hash(inode);
+ /* make the inode look hashed for the writeback code */
+ hlist_add_fake(&inode->i_hash);
inode->i_mode = ip->i_d.di_mode;
inode->i_nlink = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 214ddd7..0964949 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -37,7 +37,6 @@
#include <kmem.h>
#include <mrlock.h>
-#include <sv.h>
#include <time.h>
#include <support/debug.h>
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index cf80878..a10f641 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -353,9 +353,6 @@ xfs_parseargs(
mp->m_qflags &= ~XFS_OQUOTA_ENFD;
} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
mp->m_flags |= XFS_MOUNT_DELAYLOG;
- cmn_err(CE_WARN,
- "Enabling EXPERIMENTAL delayed logging feature "
- "- use at your own risk.\n");
} else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
} else if (!strcmp(this_char, "ihashsize")) {
@@ -837,8 +834,11 @@ xfsaild_wakeup(
struct xfs_ail *ailp,
xfs_lsn_t threshold_lsn)
{
- ailp->xa_target = threshold_lsn;
- wake_up_process(ailp->xa_task);
+ /* only ever move the target forwards */
+ if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
+ ailp->xa_target = threshold_lsn;
+ wake_up_process(ailp->xa_task);
+ }
}
STATIC int
@@ -850,8 +850,17 @@ xfsaild(
long tout = 0; /* milliseconds */
while (!kthread_should_stop()) {
- schedule_timeout_interruptible(tout ?
- msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
+ /*
+ * for short sleeps indicating congestion, don't allow us to
+ * get woken early. Otherwise all we do is bang on the AIL lock
+ * without making progress.
+ */
+ if (tout && tout <= 20)
+ __set_current_state(TASK_KILLABLE);
+ else
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(tout ?
+ msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
/* swsusp */
try_to_freeze();
@@ -938,7 +947,7 @@ out_reclaim:
* Slab object creation initialisation for the XFS inode.
* This covers only the idempotent fields in the XFS inode;
* all other fields need to be initialised on allocation
- * from the slab. This avoids the need to repeatedly intialise
+ * from the slab. This avoids the need to repeatedly initialise
* fields in the xfs inode that left in the initialise state
* when freeing the inode.
*/
@@ -1121,6 +1130,8 @@ xfs_fs_evict_inode(
*/
ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+ lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
+ &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
xfs_inactive(ip);
}
@@ -1609,16 +1620,14 @@ xfs_fs_fill_super(
goto out_free_sb;
}
-STATIC int
-xfs_fs_get_sb(
+STATIC struct dentry *
+xfs_fs_mount(
struct file_system_type *fs_type,
int flags,
const char *dev_name,
- void *data,
- struct vfsmount *mnt)
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
}
static const struct super_operations xfs_super_operations = {
@@ -1639,7 +1648,7 @@ static const struct super_operations xfs_super_operations = {
static struct file_system_type xfs_fs_type = {
.owner = THIS_MODULE,
.name = "xfs",
- .get_sb = xfs_fs_get_sb,
+ .mount = xfs_fs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 37d3325..a02480d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab(
{
struct inode *inode = VFS_I(ip);
+ ASSERT(rcu_read_lock_held());
+
+ /*
+ * check for stale RCU freed inode
+ *
+ * If the inode has been reallocated, it doesn't matter if it's not in
+ * the AG we are walking - we are walking for writeback, so if it
+ * passes all the "valid inode" checks and is dirty, then we'll write
+ * it back anyway. If it has been reallocated and still being
+ * initialised, the XFS_INEW check below will catch it.
+ */
+ spin_lock(&ip->i_flags_lock);
+ if (!ip->i_ino)
+ goto out_unlock_noent;
+
+ /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
+ if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
+ goto out_unlock_noent;
+ spin_unlock(&ip->i_flags_lock);
+
/* nothing to sync during shutdown */
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return EFSCORRUPTED;
- /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
- if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
- return ENOENT;
-
/* If we can't grab the inode, it must on it's way to reclaim. */
if (!igrab(inode))
return ENOENT;
@@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab(
/* inode is valid */
return 0;
+
+out_unlock_noent:
+ spin_unlock(&ip->i_flags_lock);
+ return ENOENT;
}
STATIC int
@@ -98,12 +118,12 @@ restart:
int error = 0;
int i;
- read_lock(&pag->pag_ici_lock);
+ rcu_read_lock();
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
(void **)batch, first_index,
XFS_LOOKUP_BATCH);
if (!nr_found) {
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
break;
}
@@ -118,18 +138,26 @@ restart:
batch[i] = NULL;
/*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
+ * Update the index for the next lookup. Catch
+ * overflows into the next AG range which can occur if
+ * we have inodes in the last block of the AG and we
+ * are currently pointing to the last inode.
+ *
+ * Because we may see inodes that are from the wrong AG
+ * due to RCU freeing and reallocation, only update the
+ * index if it lies in this AG. It was a race that lead
+ * us to see this inode, so another lookup from the
+ * same index will not find it again.
*/
+ if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
+ continue;
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
done = 1;
}
/* unlock now we've grabbed the inodes. */
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
for (i = 0; i < nr_found; i++) {
if (!batch[i])
@@ -592,12 +620,12 @@ xfs_inode_set_reclaim_tag(
struct xfs_perag *pag;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- write_lock(&pag->pag_ici_lock);
+ spin_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
__xfs_inode_set_reclaim_tag(pag, ip);
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
spin_unlock(&ip->i_flags_lock);
- write_unlock(&pag->pag_ici_lock);
+ spin_unlock(&pag->pag_ici_lock);
xfs_perag_put(pag);
}
@@ -639,9 +667,14 @@ xfs_reclaim_inode_grab(
struct xfs_inode *ip,
int flags)
{
+ ASSERT(rcu_read_lock_held());
+
+ /* quick check for stale RCU freed inode */
+ if (!ip->i_ino)
+ return 1;
/*
- * do some unlocked checks first to avoid unnecceary lock traffic.
+ * do some unlocked checks first to avoid unnecessary lock traffic.
* The first is a flush lock check, the second is a already in reclaim
* check. Only do these checks if we are not going to block on locks.
*/
@@ -654,11 +687,16 @@ xfs_reclaim_inode_grab(
* The radix tree lock here protects a thread in xfs_iget from racing
* with us starting reclaim on the inode. Once we have the
* XFS_IRECLAIM flag set it will not touch us.
+ *
+ * Due to RCU lookup, we may find inodes that have been freed and only
+ * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that
+ * aren't candidates for reclaim at all, so we must check the
+ * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
*/
spin_lock(&ip->i_flags_lock);
- ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
- if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
- /* ignore as it is already under reclaim */
+ if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
+ __xfs_iflags_test(ip, XFS_IRECLAIM)) {
+ /* not a reclaim candidate. */
spin_unlock(&ip->i_flags_lock);
return 1;
}
@@ -795,12 +833,12 @@ reclaim:
* added to the tree assert that it's been there before to catch
* problems with the inode life time early on.
*/
- write_lock(&pag->pag_ici_lock);
+ spin_lock(&pag->pag_ici_lock);
if (!radix_tree_delete(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
ASSERT(0);
__xfs_inode_clear_reclaim(pag, ip);
- write_unlock(&pag->pag_ici_lock);
+ spin_unlock(&pag->pag_ici_lock);
/*
* Here we do an (almost) spurious inode lock in order to coordinate
@@ -853,6 +891,7 @@ restart:
if (trylock) {
if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
skipped++;
+ xfs_perag_put(pag);
continue;
}
first_index = pag->pag_ici_reclaim_cursor;
@@ -863,14 +902,14 @@ restart:
struct xfs_inode *batch[XFS_LOOKUP_BATCH];
int i;
- write_lock(&pag->pag_ici_lock);
+ rcu_read_lock();
nr_found = radix_tree_gang_lookup_tag(
&pag->pag_ici_root,
(void **)batch, first_index,
XFS_LOOKUP_BATCH,
XFS_ICI_RECLAIM_TAG);
if (!nr_found) {
- write_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
break;
}
@@ -890,14 +929,24 @@ restart:
* occur if we have inodes in the last block of
* the AG and we are currently pointing to the
* last inode.
+ *
+ * Because we may see inodes that are from the
+ * wrong AG due to RCU freeing and
+ * reallocation, only update the index if it
+ * lies in this AG. It was a race that lead us
+ * to see this inode, so another lookup from
+ * the same index will not find it again.
*/
+ if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
+ pag->pag_agno)
+ continue;
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
done = 1;
}
/* unlock now we've grabbed the inodes. */
- write_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
for (i = 0; i < nr_found; i++) {
if (!batch[i])
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index acef2e9..647af2a 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -766,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__field(int, curr_res)
__field(int, unit_res)
__field(unsigned int, flags)
- __field(void *, reserve_headq)
- __field(void *, write_headq)
+ __field(int, reserveq)
+ __field(int, writeq)
__field(int, grant_reserve_cycle)
__field(int, grant_reserve_bytes)
__field(int, grant_write_cycle)
@@ -784,19 +784,21 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__entry->curr_res = tic->t_curr_res;
__entry->unit_res = tic->t_unit_res;
__entry->flags = tic->t_flags;
- __entry->reserve_headq = log->l_reserve_headq;
- __entry->write_headq = log->l_write_headq;
- __entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
- __entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
- __entry->grant_write_cycle = log->l_grant_write_cycle;
- __entry->grant_write_bytes = log->l_grant_write_bytes;
+ __entry->reserveq = list_empty(&log->l_reserveq);
+ __entry->writeq = list_empty(&log->l_writeq);
+ xlog_crack_grant_head(&log->l_grant_reserve_head,
+ &__entry->grant_reserve_cycle,
+ &__entry->grant_reserve_bytes);
+ xlog_crack_grant_head(&log->l_grant_write_head,
+ &__entry->grant_write_cycle,
+ &__entry->grant_write_bytes);
__entry->curr_cycle = log->l_curr_cycle;
__entry->curr_block = log->l_curr_block;
- __entry->tail_lsn = log->l_tail_lsn;
+ __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
),
TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
- "t_unit_res %u t_flags %s reserve_headq 0x%p "
- "write_headq 0x%p grant_reserve_cycle %d "
+ "t_unit_res %u t_flags %s reserveq %s "
+ "writeq %s grant_reserve_cycle %d "
"grant_reserve_bytes %d grant_write_cycle %d "
"grant_write_bytes %d curr_cycle %d curr_block %d "
"tail_cycle %d tail_block %d",
@@ -807,8 +809,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__entry->curr_res,
__entry->unit_res,
__print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
- __entry->reserve_headq,
- __entry->write_headq,
+ __entry->reserveq ? "empty" : "active",
+ __entry->writeq ? "empty" : "active",
__entry->grant_reserve_cycle,
__entry->grant_reserve_bytes,
__entry->grant_write_cycle,
@@ -835,6 +837,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
@@ -842,6 +845,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
@@ -935,10 +939,10 @@ DEFINE_PAGE_EVENT(xfs_writepage);
DEFINE_PAGE_EVENT(xfs_releasepage);
DEFINE_PAGE_EVENT(xfs_invalidatepage);
-DECLARE_EVENT_CLASS(xfs_iomap_class,
+DECLARE_EVENT_CLASS(xfs_imap_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
- int flags, struct xfs_bmbt_irec *irec),
- TP_ARGS(ip, offset, count, flags, irec),
+ int type, struct xfs_bmbt_irec *irec),
+ TP_ARGS(ip, offset, count, type, irec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
@@ -946,7 +950,7 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
__field(loff_t, new_size)
__field(loff_t, offset)
__field(size_t, count)
- __field(int, flags)
+ __field(int, type)
__field(xfs_fileoff_t, startoff)
__field(xfs_fsblock_t, startblock)
__field(xfs_filblks_t, blockcount)
@@ -958,13 +962,13 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
__entry->new_size = ip->i_new_size;
__entry->offset = offset;
__entry->count = count;
- __entry->flags = flags;
+ __entry->type = type;
__entry->startoff = irec ? irec->br_startoff : 0;
__entry->startblock = irec ? irec->br_startblock : 0;
__entry->blockcount = irec ? irec->br_blockcount : 0;
),
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
- "offset 0x%llx count %zd flags %s "
+ "offset 0x%llx count %zd type %s "
"startoff 0x%llx startblock %lld blockcount 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
@@ -972,20 +976,21 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
__entry->new_size,
__entry->offset,
__entry->count,
- __print_flags(__entry->flags, "|", BMAPI_FLAGS),
+ __print_symbolic(__entry->type, XFS_IO_TYPES),
__entry->startoff,
(__int64_t)__entry->startblock,
__entry->blockcount)
)
#define DEFINE_IOMAP_EVENT(name) \
-DEFINE_EVENT(xfs_iomap_class, name, \
+DEFINE_EVENT(xfs_imap_class, name, \
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
- int flags, struct xfs_bmbt_irec *irec), \
- TP_ARGS(ip, offset, count, flags, irec))
-DEFINE_IOMAP_EVENT(xfs_iomap_enter);
-DEFINE_IOMAP_EVENT(xfs_iomap_found);
-DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
+ int type, struct xfs_bmbt_irec *irec), \
+ TP_ARGS(ip, offset, count, type, irec))
+DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
+DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
DECLARE_EVENT_CLASS(xfs_simple_io_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1022,6 +1027,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \
TP_ARGS(ip, offset, count))
DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
+DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
TRACE_EVENT(xfs_itruncate_start,
@@ -1420,6 +1426,7 @@ DEFINE_EVENT(xfs_alloc_class, name, \
TP_PROTO(struct xfs_alloc_arg *args), \
TP_ARGS(args))
DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index faf8e1a..d22aa31 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -149,7 +149,6 @@ xfs_qm_dqdestroy(
ASSERT(list_empty(&dqp->q_freelist));
mutex_destroy(&dqp->q_qlock);
- sv_destroy(&dqp->q_pinwait);
kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
atomic_dec(&xfs_Gqm->qm_totaldquots);
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 0135e2a..11dd720 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -42,7 +42,7 @@ struct xfs_acl {
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
#ifdef CONFIG_XFS_POSIX_ACL
-extern int xfs_check_acl(struct inode *inode, int mask);
+extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags);
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
extern int xfs_acl_chmod(struct inode *inode);
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 63c7a1a..58632cc 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -227,7 +227,7 @@ typedef struct xfs_perag {
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
- rwlock_t pag_ici_lock; /* incore inode lock */
+ spinlock_t pag_ici_lock; /* incore inode cache lock */
struct radix_tree_root pag_ici_root; /* incore inode cache root */
int pag_ici_reclaimable; /* reclaimable inodes */
struct mutex pag_ici_reclaim_lock; /* serialisation point */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 112abc4..fa8723f 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -577,61 +577,58 @@ xfs_alloc_ag_vextent_exact(
xfs_extlen_t rlen; /* length of returned extent */
ASSERT(args->alignment == 1);
+
/*
* Allocate/initialize a cursor for the by-number freespace btree.
*/
bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO);
+ args->agno, XFS_BTNUM_BNO);
+
/*
* Lookup bno and minlen in the btree (minlen is irrelevant, really).
* Look for the closest free block <= bno, it must contain bno
* if any free block does.
*/
- if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i)))
+ error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
+ if (error)
goto error0;
- if (!i) {
- /*
- * Didn't find it, return null.
- */
- xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
- args->agbno = NULLAGBLOCK;
- return 0;
- }
+ if (!i)
+ goto not_found;
+
/*
* Grab the freespace record.
*/
- if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i)))
+ error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
+ if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
ASSERT(fbno <= args->agbno);
minend = args->agbno + args->minlen;
maxend = args->agbno + args->maxlen;
fend = fbno + flen;
+
/*
* Give up if the freespace isn't long enough for the minimum request.
*/
- if (fend < minend) {
- xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
- args->agbno = NULLAGBLOCK;
- return 0;
- }
+ if (fend < minend)
+ goto not_found;
+
/*
* End of extent will be smaller of the freespace end and the
* maximal requested end.
- */
- end = XFS_AGBLOCK_MIN(fend, maxend);
- /*
+ *
* Fix the length according to mod and prod if given.
*/
+ end = XFS_AGBLOCK_MIN(fend, maxend);
args->len = end - args->agbno;
xfs_alloc_fix_len(args);
- if (!xfs_alloc_fix_minleft(args)) {
- xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
- return 0;
- }
+ if (!xfs_alloc_fix_minleft(args))
+ goto not_found;
+
rlen = args->len;
ASSERT(args->agbno + rlen <= fend);
end = args->agbno + rlen;
+
/*
* We are allocating agbno for rlen [agbno .. end]
* Allocate/initialize a cursor for the by-size btree.
@@ -640,16 +637,25 @@ xfs_alloc_ag_vextent_exact(
args->agno, XFS_BTNUM_CNT);
ASSERT(args->agbno + args->len <=
be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
- if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
- args->agbno, args->len, XFSA_FIXUP_BNO_OK))) {
+ error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
+ args->len, XFSA_FIXUP_BNO_OK);
+ if (error) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
goto error0;
}
+
xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- trace_xfs_alloc_exact_done(args);
args->wasfromfl = 0;
+ trace_xfs_alloc_exact_done(args);
+ return 0;
+
+not_found:
+ /* Didn't find it, return null. */
+ xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+ args->agbno = NULLAGBLOCK;
+ trace_xfs_alloc_exact_notfound(args);
return 0;
error0:
@@ -659,6 +665,95 @@ error0:
}
/*
+ * Search the btree in a given direction via the search cursor and compare
+ * the records found against the good extent we've already found.
+ */
+STATIC int
+xfs_alloc_find_best_extent(
+ struct xfs_alloc_arg *args, /* allocation argument structure */
+ struct xfs_btree_cur **gcur, /* good cursor */
+ struct xfs_btree_cur **scur, /* searching cursor */
+ xfs_agblock_t gdiff, /* difference for search comparison */
+ xfs_agblock_t *sbno, /* extent found by search */
+ xfs_extlen_t *slen,
+ xfs_extlen_t *slena, /* aligned length */
+ int dir) /* 0 = search right, 1 = search left */
+{
+ xfs_agblock_t bno;
+ xfs_agblock_t new;
+ xfs_agblock_t sdiff;
+ int error;
+ int i;
+
+ /* The good extent is perfect, no need to search. */
+ if (!gdiff)
+ goto out_use_good;
+
+ /*
+ * Look until we find a better one, run out of space or run off the end.
+ */
+ do {
+ error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
+ if (error)
+ goto error0;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+ xfs_alloc_compute_aligned(*sbno, *slen, args->alignment,
+ args->minlen, &bno, slena);
+
+ /*
+ * The good extent is closer than this one.
+ */
+ if (!dir) {
+ if (bno >= args->agbno + gdiff)
+ goto out_use_good;
+ } else {
+ if (bno <= args->agbno - gdiff)
+ goto out_use_good;
+ }
+
+ /*
+ * Same distance, compare length and pick the best.
+ */
+ if (*slena >= args->minlen) {
+ args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
+ xfs_alloc_fix_len(args);
+
+ sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
+ args->alignment, *sbno,
+ *slen, &new);
+
+ /*
+ * Choose closer size and invalidate other cursor.
+ */
+ if (sdiff < gdiff)
+ goto out_use_search;
+ goto out_use_good;
+ }
+
+ if (!dir)
+ error = xfs_btree_increment(*scur, 0, &i);
+ else
+ error = xfs_btree_decrement(*scur, 0, &i);
+ if (error)
+ goto error0;
+ } while (i);
+
+out_use_good:
+ xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
+ *scur = NULL;
+ return 0;
+
+out_use_search:
+ xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
+ *gcur = NULL;
+ return 0;
+
+error0:
+ /* caller invalidates cursors */
+ return error;
+}
+
+/*
* Allocate a variable extent near bno in the allocation group agno.
* Extent's length (returned in len) will be between minlen and maxlen,
* and of the form k * prod + mod unless there's nothing that large.
@@ -925,203 +1020,45 @@ xfs_alloc_ag_vextent_near(
}
}
} while (bno_cur_lt || bno_cur_gt);
+
/*
* Got both cursors still active, need to find better entry.
*/
if (bno_cur_lt && bno_cur_gt) {
- /*
- * Left side is long enough, look for a right side entry.
- */
if (ltlena >= args->minlen) {
/*
- * Fix up the length.
+ * Left side is good, look for a right side entry.
*/
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args);
- rlen = args->len;
- ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
+ ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, ltbno, ltlen, &ltnew);
+
+ error = xfs_alloc_find_best_extent(args,
+ &bno_cur_lt, &bno_cur_gt,
+ ltdiff, &gtbno, &gtlen, &gtlena,
+ 0 /* search right */);
+ } else {
+ ASSERT(gtlena >= args->minlen);
+
/*
- * Not perfect.
- */
- if (ltdiff) {
- /*
- * Look until we find a better one, run out of
- * space, or run off the end.
- */
- while (bno_cur_lt && bno_cur_gt) {
- if ((error = xfs_alloc_get_rec(
- bno_cur_gt, &gtbno,
- &gtlen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- xfs_alloc_compute_aligned(gtbno, gtlen,
- args->alignment, args->minlen,
- &gtbnoa, &gtlena);
- /*
- * The left one is clearly better.
- */
- if (gtbnoa >= args->agbno + ltdiff) {
- xfs_btree_del_cursor(
- bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- break;
- }
- /*
- * If we reach a big enough entry,
- * compare the two and pick the best.
- */
- if (gtlena >= args->minlen) {
- args->len =
- XFS_EXTLEN_MIN(gtlena,
- args->maxlen);
- xfs_alloc_fix_len(args);
- rlen = args->len;
- gtdiff = xfs_alloc_compute_diff(
- args->agbno, rlen,
- args->alignment,
- gtbno, gtlen, &gtnew);
- /*
- * Right side is better.
- */
- if (gtdiff < ltdiff) {
- xfs_btree_del_cursor(
- bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- }
- /*
- * Left side is better.
- */
- else {
- xfs_btree_del_cursor(
- bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- }
- break;
- }
- /*
- * Fell off the right end.
- */
- if ((error = xfs_btree_increment(
- bno_cur_gt, 0, &i)))
- goto error0;
- if (!i) {
- xfs_btree_del_cursor(
- bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- break;
- }
- }
- }
- /*
- * The left side is perfect, trash the right side.
- */
- else {
- xfs_btree_del_cursor(bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- }
- }
- /*
- * It's the right side that was found first, look left.
- */
- else {
- /*
- * Fix up the length.
+ * Right side is good, look for a left side entry.
*/
args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
xfs_alloc_fix_len(args);
- rlen = args->len;
- gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
+ gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, gtbno, gtlen, &gtnew);
- /*
- * Right side entry isn't perfect.
- */
- if (gtdiff) {
- /*
- * Look until we find a better one, run out of
- * space, or run off the end.
- */
- while (bno_cur_lt && bno_cur_gt) {
- if ((error = xfs_alloc_get_rec(
- bno_cur_lt, &ltbno,
- &ltlen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- xfs_alloc_compute_aligned(ltbno, ltlen,
- args->alignment, args->minlen,
- &ltbnoa, &ltlena);
- /*
- * The right one is clearly better.
- */
- if (ltbnoa <= args->agbno - gtdiff) {
- xfs_btree_del_cursor(
- bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- break;
- }
- /*
- * If we reach a big enough entry,
- * compare the two and pick the best.
- */
- if (ltlena >= args->minlen) {
- args->len = XFS_EXTLEN_MIN(
- ltlena, args->maxlen);
- xfs_alloc_fix_len(args);
- rlen = args->len;
- ltdiff = xfs_alloc_compute_diff(
- args->agbno, rlen,
- args->alignment,
- ltbno, ltlen, &ltnew);
- /*
- * Left side is better.
- */
- if (ltdiff < gtdiff) {
- xfs_btree_del_cursor(
- bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- }
- /*
- * Right side is better.
- */
- else {
- xfs_btree_del_cursor(
- bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- }
- break;
- }
- /*
- * Fell off the left end.
- */
- if ((error = xfs_btree_decrement(
- bno_cur_lt, 0, &i)))
- goto error0;
- if (!i) {
- xfs_btree_del_cursor(bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- break;
- }
- }
- }
- /*
- * The right side is perfect, trash the left side.
- */
- else {
- xfs_btree_del_cursor(bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- }
+
+ error = xfs_alloc_find_best_extent(args,
+ &bno_cur_gt, &bno_cur_lt,
+ gtdiff, &ltbno, &ltlen, &ltlena,
+ 1 /* search left */);
}
+
+ if (error)
+ goto error0;
}
+
/*
* If we couldn't get anything, give up.
*/
@@ -1130,6 +1067,7 @@ xfs_alloc_ag_vextent_near(
args->agbno = NULLAGBLOCK;
return 0;
}
+
/*
* At this point we have selected a freespace entry, either to the
* left or to the right. If it's on the right, copy all the
@@ -1146,6 +1084,7 @@ xfs_alloc_ag_vextent_near(
j = 1;
} else
j = 0;
+
/*
* Fix up the length and compute the useful address.
*/
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index a6cff8e..71e90dc2 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -637,7 +637,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
* It didn't all fit, so we have to sort everything on hashval.
*/
sbsize = sf->hdr.count * sizeof(*sbuf);
- sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP);
+ sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS);
/*
* Scan the attribute list for the rest of the entries, storing
@@ -2386,7 +2386,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
args.dp = context->dp;
args.whichfork = XFS_ATTR_FORK;
args.valuelen = valuelen;
- args.value = kmem_alloc(valuelen, KM_SLEEP);
+ args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen);
retval = xfs_attr_rmtval_get(&args);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8abd12e..4111cd3 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5471,8 +5471,13 @@ xfs_getbmap(
if (error)
goto out_unlock_iolock;
}
-
- ASSERT(ip->i_delayed_blks == 0);
+ /*
+ * even after flushing the inode, there can still be delalloc
+ * blocks on the inode beyond EOF due to speculative
+ * preallocation. These are not removed until the release
+ * function is called or the inode is inactivated. Hence we
+ * cannot assert here that ip->i_delayed_blks == 0.
+ */
}
lock = xfs_ilock_map_shared(ip);
@@ -6070,3 +6075,79 @@ xfs_bmap_disk_count_leaves(
*count += xfs_bmbt_disk_get_blockcount(frp);
}
}
+
+/*
+ * dead simple method of punching delalyed allocation blocks from a range in
+ * the inode. Walks a block at a time so will be slow, but is only executed in
+ * rare error cases so the overhead is not critical. This will alays punch out
+ * both the start and end blocks, even if the ranges only partially overlap
+ * them, so it is up to the caller to ensure that partial blocks are not
+ * passed in.
+ */
+int
+xfs_bmap_punch_delalloc_range(
+ struct xfs_inode *ip,
+ xfs_fileoff_t start_fsb,
+ xfs_fileoff_t length)
+{
+ xfs_fileoff_t remaining = length;
+ int error = 0;
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+ do {
+ int done;
+ xfs_bmbt_irec_t imap;
+ int nimaps = 1;
+ xfs_fsblock_t firstblock;
+ xfs_bmap_free_t flist;
+
+ /*
+ * Map the range first and check that it is a delalloc extent
+ * before trying to unmap the range. Otherwise we will be
+ * trying to remove a real extent (which requires a
+ * transaction) or a hole, which is probably a bad idea...
+ */
+ error = xfs_bmapi(NULL, ip, start_fsb, 1,
+ XFS_BMAPI_ENTIRE, NULL, 0, &imap,
+ &nimaps, NULL);
+
+ if (error) {
+ /* something screwed, just bail */
+ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+ "Failed delalloc mapping lookup ino %lld fsb %lld.",
+ ip->i_ino, start_fsb);
+ }
+ break;
+ }
+ if (!nimaps) {
+ /* nothing there */
+ goto next_block;
+ }
+ if (imap.br_startblock != DELAYSTARTBLOCK) {
+ /* been converted, ignore */
+ goto next_block;
+ }
+ WARN_ON(imap.br_blockcount == 0);
+
+ /*
+ * Note: while we initialise the firstblock/flist pair, they
+ * should never be used because blocks should never be
+ * allocated or freed for a delalloc extent and hence we need
+ * don't cancel or finish them after the xfs_bunmapi() call.
+ */
+ xfs_bmap_init(&flist, &firstblock);
+ error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
+ &flist, &done);
+ if (error)
+ break;
+
+ ASSERT(!flist.xbf_count && !flist.xbf_first);
+next_block:
+ start_fsb++;
+ remaining--;
+ } while(remaining > 0);
+
+ return error;
+}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 71ec9b6..3651191 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -394,6 +394,11 @@ xfs_bmap_count_blocks(
int whichfork,
int *count);
+int
+xfs_bmap_punch_delalloc_range(
+ struct xfs_inode *ip,
+ xfs_fileoff_t start_fsb,
+ xfs_fileoff_t length);
#endif /* __KERNEL__ */
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 04f9cca..2f9e97c 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -634,9 +634,8 @@ xfs_btree_read_bufl(
return error;
}
ASSERT(!bp || !XFS_BUF_GETERROR(bp));
- if (bp != NULL) {
+ if (bp)
XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
- }
*bpp = bp;
return 0;
}
@@ -944,13 +943,13 @@ xfs_btree_set_refs(
switch (cur->bc_btnum) {
case XFS_BTNUM_BNO:
case XFS_BTNUM_CNT:
- XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
+ XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
break;
case XFS_BTNUM_INO:
- XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
+ XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF);
break;
case XFS_BTNUM_BMAP:
- XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
+ XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF);
break;
default:
ASSERT(0);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 2686d0d..ed2b65f 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -142,7 +142,7 @@ xfs_buf_item_log_check(
#endif
STATIC void xfs_buf_error_relse(xfs_buf_t *bp);
-STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
+STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
/*
* This returns the number of log iovecs needed to log the
@@ -450,7 +450,7 @@ xfs_buf_item_unpin(
* xfs_trans_ail_delete() drops the AIL lock.
*/
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
- xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip);
+ xfs_buf_do_callbacks(bp);
XFS_BUF_SET_FSPRIVATE(bp, NULL);
XFS_BUF_CLR_IODONE_FUNC(bp);
} else {
@@ -918,15 +918,26 @@ xfs_buf_attach_iodone(
XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
}
+/*
+ * We can have many callbacks on a buffer. Running the callbacks individually
+ * can cause a lot of contention on the AIL lock, so we allow for a single
+ * callback to be able to scan the remaining lip->li_bio_list for other items
+ * of the same type and callback to be processed in the first call.
+ *
+ * As a result, the loop walking the callback list below will also modify the
+ * list. it removes the first item from the list and then runs the callback.
+ * The loop then restarts from the new head of the list. This allows the
+ * callback to scan and modify the list attached to the buffer and we don't
+ * have to care about maintaining a next item pointer.
+ */
STATIC void
xfs_buf_do_callbacks(
- xfs_buf_t *bp,
- xfs_log_item_t *lip)
+ struct xfs_buf *bp)
{
- xfs_log_item_t *nlip;
+ struct xfs_log_item *lip;
- while (lip != NULL) {
- nlip = lip->li_bio_list;
+ while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) {
+ XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list);
ASSERT(lip->li_cb != NULL);
/*
* Clear the next pointer so we don't have any
@@ -936,7 +947,6 @@ xfs_buf_do_callbacks(
*/
lip->li_bio_list = NULL;
lip->li_cb(bp, lip);
- lip = nlip;
}
}
@@ -970,7 +980,7 @@ xfs_buf_iodone_callbacks(
ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
XFS_BUF_SUPER_STALE(bp);
trace_xfs_buf_item_iodone(bp, _RET_IP_);
- xfs_buf_do_callbacks(bp, lip);
+ xfs_buf_do_callbacks(bp);
XFS_BUF_SET_FSPRIVATE(bp, NULL);
XFS_BUF_CLR_IODONE_FUNC(bp);
xfs_buf_ioend(bp, 0);
@@ -1029,7 +1039,7 @@ xfs_buf_iodone_callbacks(
return;
}
- xfs_buf_do_callbacks(bp, lip);
+ xfs_buf_do_callbacks(bp);
XFS_BUF_SET_FSPRIVATE(bp, NULL);
XFS_BUF_CLR_IODONE_FUNC(bp);
xfs_buf_ioend(bp, 0);
@@ -1063,7 +1073,7 @@ xfs_buf_error_relse(
* We have to unpin the pinned buffers so do the
* callbacks.
*/
- xfs_buf_do_callbacks(bp, lip);
+ xfs_buf_do_callbacks(bp);
XFS_BUF_SET_FSPRIVATE(bp, NULL);
XFS_BUF_CLR_IODONE_FUNC(bp);
XFS_BUF_SET_BRELSE_FUNC(bp,NULL);
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 0e2ed43..b6ecd20 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -105,17 +105,6 @@ typedef struct xfs_buf_log_item {
xfs_buf_log_format_t bli_format; /* in-log header */
} xfs_buf_log_item_t;
-/*
- * This structure is used during recovery to record the buf log
- * items which have been canceled and should not be replayed.
- */
-typedef struct xfs_buf_cancel {
- xfs_daddr_t bc_blkno;
- uint bc_len;
- int bc_refcount;
- struct xfs_buf_cancel *bc_next;
-} xfs_buf_cancel_t;
-
void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
void xfs_buf_item_relse(struct xfs_buf *);
void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3b9582c..e60490b 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -377,6 +377,19 @@ xfs_swap_extents(
ip->i_d.di_format = tip->i_d.di_format;
tip->i_d.di_format = tmp;
+ /*
+ * The extents in the source inode could still contain speculative
+ * preallocation beyond EOF (e.g. the file is open but not modified
+ * while defrag is in progress). In that case, we need to copy over the
+ * number of delalloc blocks the data fork in the source inode is
+ * tracking beyond EOF so that when the fork is truncated away when the
+ * temporary inode is unlinked we don't underrun the i_delayed_blks
+ * counter on that inode.
+ */
+ ASSERT(tip->i_delayed_blks == 0);
+ tip->i_delayed_blks = ip->i_delayed_blks;
+ ip->i_delayed_blks = 0;
+
ilf_fields = XFS_ILOG_CORE;
switch(ip->i_d.di_format) {
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ed99902..c78cc6a 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -58,6 +58,7 @@ xfs_error_trap(int e)
int xfs_etest[XFS_NUM_INJECT_ERROR];
int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
+int xfs_error_test_active;
int
xfs_error_test(int error_tag, int *fsidp, char *expression,
@@ -108,6 +109,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
len = strlen(mp->m_fsname);
xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP);
strcpy(xfs_etest_fsname[i], mp->m_fsname);
+ xfs_error_test_active++;
return 0;
}
}
@@ -137,6 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
xfs_etest_fsid[i] = 0LL;
kmem_free(xfs_etest_fsname[i]);
xfs_etest_fsname[i] = NULL;
+ xfs_error_test_active--;
}
}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index c2c1a07..f338847 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -127,13 +127,14 @@ extern void xfs_corruption_error(const char *tag, int level,
#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
#ifdef DEBUG
+extern int xfs_error_test_active;
extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
#define XFS_NUM_INJECT_ERROR 10
#define XFS_TEST_ERROR(expr, mp, tag, rf) \
- ((expr) || \
+ ((expr) || (xfs_error_test_active && \
xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
- (rf)))
+ (rf))))
extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index a55e687..75f2ef6 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -48,6 +48,28 @@ xfs_efi_item_free(
}
/*
+ * Freeing the efi requires that we remove it from the AIL if it has already
+ * been placed there. However, the EFI may not yet have been placed in the AIL
+ * when called by xfs_efi_release() from EFD processing due to the ordering of
+ * committed vs unpin operations in bulk insert operations. Hence the
+ * test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees
+ * the EFI.
+ */
+STATIC void
+__xfs_efi_release(
+ struct xfs_efi_log_item *efip)
+{
+ struct xfs_ail *ailp = efip->efi_item.li_ailp;
+
+ if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) {
+ spin_lock(&ailp->xa_lock);
+ /* xfs_trans_ail_delete() drops the AIL lock. */
+ xfs_trans_ail_delete(ailp, &efip->efi_item);
+ xfs_efi_item_free(efip);
+ }
+}
+
+/*
* This returns the number of iovecs needed to log the given efi item.
* We only need 1 iovec for an efi item. It just logs the efi_log_format
* structure.
@@ -74,7 +96,8 @@ xfs_efi_item_format(
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
uint size;
- ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents);
+ ASSERT(atomic_read(&efip->efi_next_extent) ==
+ efip->efi_format.efi_nextents);
efip->efi_format.efi_type = XFS_LI_EFI;
@@ -99,10 +122,12 @@ xfs_efi_item_pin(
}
/*
- * While EFIs cannot really be pinned, the unpin operation is the
- * last place at which the EFI is manipulated during a transaction.
- * Here we coordinate with xfs_efi_cancel() to determine who gets to
- * free the EFI.
+ * While EFIs cannot really be pinned, the unpin operation is the last place at
+ * which the EFI is manipulated during a transaction. If we are being asked to
+ * remove the EFI it's because the transaction has been cancelled and by
+ * definition that means the EFI cannot be in the AIL so remove it from the
+ * transaction and free it. Otherwise coordinate with xfs_efi_release() (via
+ * XFS_EFI_COMMITTED) to determine who gets to free the EFI.
*/
STATIC void
xfs_efi_item_unpin(
@@ -110,20 +135,14 @@ xfs_efi_item_unpin(
int remove)
{
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
- struct xfs_ail *ailp = lip->li_ailp;
-
- spin_lock(&ailp->xa_lock);
- if (efip->efi_flags & XFS_EFI_CANCELED) {
- if (remove)
- xfs_trans_del_item(lip);
- /* xfs_trans_ail_delete() drops the AIL lock. */
- xfs_trans_ail_delete(ailp, lip);
+ if (remove) {
+ ASSERT(!(lip->li_flags & XFS_LI_IN_AIL));
+ xfs_trans_del_item(lip);
xfs_efi_item_free(efip);
- } else {
- efip->efi_flags |= XFS_EFI_COMMITTED;
- spin_unlock(&ailp->xa_lock);
+ return;
}
+ __xfs_efi_release(efip);
}
/*
@@ -152,16 +171,20 @@ xfs_efi_item_unlock(
}
/*
- * The EFI is logged only once and cannot be moved in the log, so
- * simply return the lsn at which it's been logged. The canceled
- * flag is not paid any attention here. Checking for that is delayed
- * until the EFI is unpinned.
+ * The EFI is logged only once and cannot be moved in the log, so simply return
+ * the lsn at which it's been logged. For bulk transaction committed
+ * processing, the EFI may be processed but not yet unpinned prior to the EFD
+ * being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected
+ * when processing the EFD.
*/
STATIC xfs_lsn_t
xfs_efi_item_committed(
struct xfs_log_item *lip,
xfs_lsn_t lsn)
{
+ struct xfs_efi_log_item *efip = EFI_ITEM(lip);
+
+ set_bit(XFS_EFI_COMMITTED, &efip->efi_flags);
return lsn;
}
@@ -230,6 +253,7 @@ xfs_efi_init(
xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
efip->efi_format.efi_nextents = nextents;
efip->efi_format.efi_id = (__psint_t)(void*)efip;
+ atomic_set(&efip->efi_next_extent, 0);
return efip;
}
@@ -289,37 +313,18 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
}
/*
- * This is called by the efd item code below to release references to
- * the given efi item. Each efd calls this with the number of
- * extents that it has logged, and when the sum of these reaches
- * the total number of extents logged by this efi item we can free
- * the efi item.
- *
- * Freeing the efi item requires that we remove it from the AIL.
- * We'll use the AIL lock to protect our counters as well as
- * the removal from the AIL.
+ * This is called by the efd item code below to release references to the given
+ * efi item. Each efd calls this with the number of extents that it has
+ * logged, and when the sum of these reaches the total number of extents logged
+ * by this efi item we can free the efi item.
*/
void
xfs_efi_release(xfs_efi_log_item_t *efip,
uint nextents)
{
- struct xfs_ail *ailp = efip->efi_item.li_ailp;
- int extents_left;
-
- ASSERT(efip->efi_next_extent > 0);
- ASSERT(efip->efi_flags & XFS_EFI_COMMITTED);
-
- spin_lock(&ailp->xa_lock);
- ASSERT(efip->efi_next_extent >= nextents);
- efip->efi_next_extent -= nextents;
- extents_left = efip->efi_next_extent;
- if (extents_left == 0) {
- /* xfs_trans_ail_delete() drops the AIL lock. */
- xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
- xfs_efi_item_free(efip);
- } else {
- spin_unlock(&ailp->xa_lock);
- }
+ ASSERT(atomic_read(&efip->efi_next_extent) >= nextents);
+ if (atomic_sub_and_test(nextents, &efip->efi_next_extent))
+ __xfs_efi_release(efip);
}
static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 0d22c56..375f68e 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -111,11 +111,10 @@ typedef struct xfs_efd_log_format_64 {
#define XFS_EFI_MAX_FAST_EXTENTS 16
/*
- * Define EFI flags.
+ * Define EFI flag bits. Manipulated by set/clear/test_bit operators.
*/
-#define XFS_EFI_RECOVERED 0x1
-#define XFS_EFI_COMMITTED 0x2
-#define XFS_EFI_CANCELED 0x4
+#define XFS_EFI_RECOVERED 1
+#define XFS_EFI_COMMITTED 2
/*
* This is the "extent free intention" log item. It is used
@@ -125,8 +124,8 @@ typedef struct xfs_efd_log_format_64 {
*/
typedef struct xfs_efi_log_item {
xfs_log_item_t efi_item;
- uint efi_flags; /* misc flags */
- uint efi_next_extent;
+ atomic_t efi_next_extent;
+ unsigned long efi_flags; /* misc flags */
xfs_efi_log_format_t efi_format;
} xfs_efi_log_item_t;
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9b715dc..9124425 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -744,9 +744,15 @@ xfs_filestream_new_ag(
* If the file's parent directory is known, take its iolock in exclusive
* mode to prevent two sibling files from racing each other to migrate
* themselves and their parent to different AGs.
+ *
+ * Note that we lock the parent directory iolock inside the child
+ * iolock here. That's fine as we never hold both parent and child
+ * iolock in any other place. This is different from the ilock,
+ * which requires locking of the child after the parent for namespace
+ * operations.
*/
if (pip)
- xfs_ilock(pip, XFS_IOLOCK_EXCL);
+ xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
/*
* A new AG needs to be found for the file. If the file's parent
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a7c116e..f56d30e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -374,6 +374,7 @@ xfs_growfs_data_private(
mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
} else
mp->m_maxicount = 0;
+ xfs_set_low_space_thresholds(mp);
/* update secondary superblocks. */
for (agno = 1; agno < nagcount; agno++) {
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0cdd269..cb9b6d1 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -43,6 +43,17 @@
/*
+ * Define xfs inode iolock lockdep classes. We need to ensure that all active
+ * inodes are considered the same for lockdep purposes, including inodes that
+ * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to
+ * guarantee the locks are considered the same when there are multiple lock
+ * initialisation siteѕ. Also, define a reclaimable inode class so it is
+ * obvious in lockdep reports which class the report is against.
+ */
+static struct lock_class_key xfs_iolock_active;
+struct lock_class_key xfs_iolock_reclaimable;
+
+/*
* Allocate and initialise an xfs_inode.
*/
STATIC struct xfs_inode *
@@ -69,8 +80,11 @@ xfs_inode_alloc(
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
+ ASSERT(ip->i_ino == 0);
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+ lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
+ &xfs_iolock_active, "xfs_iolock_active");
/* initialise the xfs inode */
ip->i_ino = ino;
@@ -85,12 +99,20 @@ xfs_inode_alloc(
ip->i_size = 0;
ip->i_new_size = 0;
- /* prevent anyone from using this yet */
- VFS_I(ip)->i_state = I_NEW;
-
return ip;
}
+STATIC void
+xfs_inode_free_callback(
+ struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct xfs_inode *ip = XFS_I(inode);
+
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_zone_free(xfs_inode_zone, ip);
+}
+
void
xfs_inode_free(
struct xfs_inode *ip)
@@ -134,7 +156,18 @@ xfs_inode_free(
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
- kmem_zone_free(xfs_inode_zone, ip);
+ /*
+ * Because we use RCU freeing we need to ensure the inode always
+ * appears to be reclaimed with an invalid inode number when in the
+ * free state. The ip->i_flags_lock provides the barrier against lookup
+ * races.
+ */
+ spin_lock(&ip->i_flags_lock);
+ ip->i_flags = XFS_IRECLAIM;
+ ip->i_ino = 0;
+ spin_unlock(&ip->i_flags_lock);
+
+ call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
}
/*
@@ -144,14 +177,29 @@ static int
xfs_iget_cache_hit(
struct xfs_perag *pag,
struct xfs_inode *ip,
+ xfs_ino_t ino,
int flags,
- int lock_flags) __releases(pag->pag_ici_lock)
+ int lock_flags) __releases(RCU)
{
struct inode *inode = VFS_I(ip);
struct xfs_mount *mp = ip->i_mount;
int error;
+ /*
+ * check for re-use of an inode within an RCU grace period due to the
+ * radix tree nodes not being updated yet. We monitor for this by
+ * setting the inode number to zero before freeing the inode structure.
+ * If the inode has been reallocated and set up, then the inode number
+ * will not match, so check for that, too.
+ */
spin_lock(&ip->i_flags_lock);
+ if (ip->i_ino != ino) {
+ trace_xfs_iget_skip(ip);
+ XFS_STATS_INC(xs_ig_frecycle);
+ error = EAGAIN;
+ goto out_error;
+ }
+
/*
* If we are racing with another cache hit that is currently
@@ -194,7 +242,7 @@ xfs_iget_cache_hit(
ip->i_flags |= XFS_IRECLAIM;
spin_unlock(&ip->i_flags_lock);
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
error = -inode_init_always(mp->m_super, inode);
if (error) {
@@ -202,7 +250,7 @@ xfs_iget_cache_hit(
* Re-initializing the inode failed, and we are in deep
* trouble. Try to re-add it to the reclaim list.
*/
- read_lock(&pag->pag_ici_lock);
+ rcu_read_lock();
spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~XFS_INEW;
@@ -212,14 +260,20 @@ xfs_iget_cache_hit(
goto out_error;
}
- write_lock(&pag->pag_ici_lock);
+ spin_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
ip->i_flags |= XFS_INEW;
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
inode->i_state = I_NEW;
+
+ ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+ mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+ lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
+ &xfs_iolock_active, "xfs_iolock_active");
+
spin_unlock(&ip->i_flags_lock);
- write_unlock(&pag->pag_ici_lock);
+ spin_unlock(&pag->pag_ici_lock);
} else {
/* If the VFS inode is being torn down, pause and try again. */
if (!igrab(inode)) {
@@ -230,7 +284,7 @@ xfs_iget_cache_hit(
/* We've got a live one. */
spin_unlock(&ip->i_flags_lock);
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
trace_xfs_iget_hit(ip);
}
@@ -244,7 +298,7 @@ xfs_iget_cache_hit(
out_error:
spin_unlock(&ip->i_flags_lock);
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
return error;
}
@@ -297,7 +351,7 @@ xfs_iget_cache_miss(
BUG();
}
- write_lock(&pag->pag_ici_lock);
+ spin_lock(&pag->pag_ici_lock);
/* insert the new inode */
error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
@@ -312,14 +366,14 @@ xfs_iget_cache_miss(
ip->i_udquot = ip->i_gdquot = NULL;
xfs_iflags_set(ip, XFS_INEW);
- write_unlock(&pag->pag_ici_lock);
+ spin_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
*ipp = ip;
return 0;
out_preload_end:
- write_unlock(&pag->pag_ici_lock);
+ spin_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
if (lock_flags)
xfs_iunlock(ip, lock_flags);
@@ -366,7 +420,7 @@ xfs_iget(
xfs_agino_t agino;
/* reject inode numbers outside existing AGs */
- if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
+ if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
return EINVAL;
/* get the perag structure and ensure that it's inode capable */
@@ -375,15 +429,15 @@ xfs_iget(
again:
error = 0;
- read_lock(&pag->pag_ici_lock);
+ rcu_read_lock();
ip = radix_tree_lookup(&pag->pag_ici_root, agino);
if (ip) {
- error = xfs_iget_cache_hit(pag, ip, flags, lock_flags);
+ error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
if (error)
goto out_error_or_again;
} else {
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
XFS_STATS_INC(xs_ig_missed);
error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 108c7a0..be7cf62 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -887,7 +887,7 @@ xfs_iread(
* around for a while. This helps to keep recently accessed
* meta-data in-core longer.
*/
- XFS_BUF_SET_REF(bp, XFS_INO_REF);
+ xfs_buf_set_ref(bp, XFS_INO_REF);
/*
* Use xfs_trans_brelse() to release the buffer containing the
@@ -2000,17 +2000,33 @@ xfs_ifree_cluster(
*/
for (i = 0; i < ninodes; i++) {
retry:
- read_lock(&pag->pag_ici_lock);
+ rcu_read_lock();
ip = radix_tree_lookup(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, (inum + i)));
- /* Inode not in memory or stale, nothing to do */
- if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
- read_unlock(&pag->pag_ici_lock);
+ /* Inode not in memory, nothing to do */
+ if (!ip) {
+ rcu_read_unlock();
continue;
}
/*
+ * because this is an RCU protected lookup, we could
+ * find a recently freed or even reallocated inode
+ * during the lookup. We need to check under the
+ * i_flags_lock for a valid inode here. Skip it if it
+ * is not valid, the wrong inode or stale.
+ */
+ spin_lock(&ip->i_flags_lock);
+ if (ip->i_ino != inum + i ||
+ __xfs_iflags_test(ip, XFS_ISTALE)) {
+ spin_unlock(&ip->i_flags_lock);
+ rcu_read_unlock();
+ continue;
+ }
+ spin_unlock(&ip->i_flags_lock);
+
+ /*
* Don't try to lock/unlock the current inode, but we
* _cannot_ skip the other inodes that we did not find
* in the list attached to the buffer and are not
@@ -2019,11 +2035,11 @@ retry:
*/
if (ip != free_ip &&
!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
delay(1);
goto retry;
}
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
xfs_iflock(ip);
xfs_iflags_set(ip, XFS_ISTALE);
@@ -2629,7 +2645,7 @@ xfs_iflush_cluster(
mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
- read_lock(&pag->pag_ici_lock);
+ rcu_read_lock();
/* really need a gang lookup range call here */
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
first_index, inodes_per_cluster);
@@ -2640,9 +2656,21 @@ xfs_iflush_cluster(
iq = ilist[i];
if (iq == ip)
continue;
- /* if the inode lies outside this cluster, we're done. */
- if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
- break;
+
+ /*
+ * because this is an RCU protected lookup, we could find a
+ * recently freed or even reallocated inode during the lookup.
+ * We need to check under the i_flags_lock for a valid inode
+ * here. Skip it if it is not valid or the wrong inode.
+ */
+ spin_lock(&ip->i_flags_lock);
+ if (!ip->i_ino ||
+ (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
+ spin_unlock(&ip->i_flags_lock);
+ continue;
+ }
+ spin_unlock(&ip->i_flags_lock);
+
/*
* Do an un-protected check to see if the inode is dirty and
* is a candidate for flushing. These checks will be repeated
@@ -2692,7 +2720,7 @@ xfs_iflush_cluster(
}
out_free:
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
kmem_free(ilist);
out_put:
xfs_perag_put(pag);
@@ -2704,7 +2732,7 @@ cluster_corrupt_out:
* Corruption detected in the clustering loop. Invalidate the
* inode buffer and shut down the filesystem.
*/
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
/*
* Clean up the buffer. If it was B_DELWRI, just release it --
* brelse can handle it with no problems. If not, shut down the
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index fb2ca2e..5c95fa8 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -376,12 +376,13 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
/*
* In-core inode flags.
*/
-#define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */
-#define XFS_ISTALE 0x0002 /* inode has been staled */
-#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
-#define XFS_INEW 0x0008 /* inode has just been allocated */
-#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
-#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
+#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */
+#define XFS_ISTALE 0x0002 /* inode has been staled */
+#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
+#define XFS_INEW 0x0008 /* inode has just been allocated */
+#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
+#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
+#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */
/*
* Flags for inode locking.
@@ -438,6 +439,8 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
+extern struct lock_class_key xfs_iolock_reclaimable;
+
/*
* Flags for xfs_itruncate_start().
*/
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c7ac020..fd4f398 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -657,18 +657,37 @@ xfs_inode_item_unlock(
}
/*
- * This is called to find out where the oldest active copy of the
- * inode log item in the on disk log resides now that the last log
- * write of it completed at the given lsn. Since we always re-log
- * all dirty data in an inode, the latest copy in the on disk log
- * is the only one that matters. Therefore, simply return the
- * given lsn.
+ * This is called to find out where the oldest active copy of the inode log
+ * item in the on disk log resides now that the last log write of it completed
+ * at the given lsn. Since we always re-log all dirty data in an inode, the
+ * latest copy in the on disk log is the only one that matters. Therefore,
+ * simply return the given lsn.
+ *
+ * If the inode has been marked stale because the cluster is being freed, we
+ * don't want to (re-)insert this inode into the AIL. There is a race condition
+ * where the cluster buffer may be unpinned before the inode is inserted into
+ * the AIL during transaction committed processing. If the buffer is unpinned
+ * before the inode item has been committed and inserted, then it is possible
+ * for the buffer to be written and IO completions before the inode is inserted
+ * into the AIL. In that case, we'd be inserting a clean, stale inode into the
+ * AIL which will never get removed. It will, however, get reclaimed which
+ * triggers an assert in xfs_inode_free() complaining about freein an inode
+ * still in the AIL.
+ *
+ * To avoid this, return a lower LSN than the one passed in so that the
+ * transaction committed code will not move the inode forward in the AIL but
+ * will still unpin it properly.
*/
STATIC xfs_lsn_t
xfs_inode_item_committed(
struct xfs_log_item *lip,
xfs_lsn_t lsn)
{
+ struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+ struct xfs_inode *ip = iip->ili_inode;
+
+ if (xfs_iflags_test(ip, XFS_ISTALE))
+ return lsn - 1;
return lsn;
}
@@ -823,15 +842,64 @@ xfs_inode_item_destroy(
* flushed to disk. It is responsible for removing the inode item
* from the AIL if it has not been re-logged, and unlocking the inode's
* flush lock.
+ *
+ * To reduce AIL lock traffic as much as possible, we scan the buffer log item
+ * list for other inodes that will run this function. We remove them from the
+ * buffer list so we can process all the inode IO completions in one AIL lock
+ * traversal.
*/
void
xfs_iflush_done(
struct xfs_buf *bp,
struct xfs_log_item *lip)
{
- struct xfs_inode_log_item *iip = INODE_ITEM(lip);
- xfs_inode_t *ip = iip->ili_inode;
+ struct xfs_inode_log_item *iip;
+ struct xfs_log_item *blip;
+ struct xfs_log_item *next;
+ struct xfs_log_item *prev;
struct xfs_ail *ailp = lip->li_ailp;
+ int need_ail = 0;
+
+ /*
+ * Scan the buffer IO completions for other inodes being completed and
+ * attach them to the current inode log item.
+ */
+ blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ prev = NULL;
+ while (blip != NULL) {
+ if (lip->li_cb != xfs_iflush_done) {
+ prev = blip;
+ blip = blip->li_bio_list;
+ continue;
+ }
+
+ /* remove from list */
+ next = blip->li_bio_list;
+ if (!prev) {
+ XFS_BUF_SET_FSPRIVATE(bp, next);
+ } else {
+ prev->li_bio_list = next;
+ }
+
+ /* add to current list */
+ blip->li_bio_list = lip->li_bio_list;
+ lip->li_bio_list = blip;
+
+ /*
+ * while we have the item, do the unlocked check for needing
+ * the AIL lock.
+ */
+ iip = INODE_ITEM(blip);
+ if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
+ need_ail++;
+
+ blip = next;
+ }
+
+ /* make sure we capture the state of the initial inode. */
+ iip = INODE_ITEM(lip);
+ if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
+ need_ail++;
/*
* We only want to pull the item from the AIL if it is
@@ -842,28 +910,37 @@ xfs_iflush_done(
* the lock since it's cheaper, and then we recheck while
* holding the lock before removing the inode from the AIL.
*/
- if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) {
+ if (need_ail) {
+ struct xfs_log_item *log_items[need_ail];
+ int i = 0;
spin_lock(&ailp->xa_lock);
- if (lip->li_lsn == iip->ili_flush_lsn) {
- /* xfs_trans_ail_delete() drops the AIL lock. */
- xfs_trans_ail_delete(ailp, lip);
- } else {
- spin_unlock(&ailp->xa_lock);
+ for (blip = lip; blip; blip = blip->li_bio_list) {
+ iip = INODE_ITEM(blip);
+ if (iip->ili_logged &&
+ blip->li_lsn == iip->ili_flush_lsn) {
+ log_items[i++] = blip;
+ }
+ ASSERT(i <= need_ail);
}
+ /* xfs_trans_ail_delete_bulk() drops the AIL lock. */
+ xfs_trans_ail_delete_bulk(ailp, log_items, i);
}
- iip->ili_logged = 0;
/*
- * Clear the ili_last_fields bits now that we know that the
- * data corresponding to them is safely on disk.
+ * clean up and unlock the flush lock now we are done. We can clear the
+ * ili_last_fields bits now that we know that the data corresponding to
+ * them is safely on disk.
*/
- iip->ili_last_fields = 0;
+ for (blip = lip; blip; blip = next) {
+ next = blip->li_bio_list;
+ blip->li_bio_list = NULL;
- /*
- * Release the inode's flush lock since we're done with it.
- */
- xfs_ifunlock(ip);
+ iip = INODE_ITEM(blip);
+ iip->ili_logged = 0;
+ iip->ili_last_fields = 0;
+ xfs_ifunlock(iip->ili_inode);
+ }
}
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 2057614..55582bd 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -47,127 +47,8 @@
#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
<< mp->m_writeio_log)
-#define XFS_STRAT_WRITE_IMAPS 2
#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP
-STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
- int, struct xfs_bmbt_irec *, int *);
-STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
- struct xfs_bmbt_irec *, int *);
-STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
- struct xfs_bmbt_irec *, int *);
-
-int
-xfs_iomap(
- struct xfs_inode *ip,
- xfs_off_t offset,
- ssize_t count,
- int flags,
- struct xfs_bmbt_irec *imap,
- int *nimaps,
- int *new)
-{
- struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t offset_fsb, end_fsb;
- int error = 0;
- int lockmode = 0;
- int bmapi_flags = 0;
-
- ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
-
- *new = 0;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
-
- trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
-
- switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
- case BMAPI_READ:
- lockmode = xfs_ilock_map_shared(ip);
- bmapi_flags = XFS_BMAPI_ENTIRE;
- break;
- case BMAPI_WRITE:
- lockmode = XFS_ILOCK_EXCL;
- if (flags & BMAPI_IGNSTATE)
- bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
- xfs_ilock(ip, lockmode);
- break;
- case BMAPI_ALLOCATE:
- lockmode = XFS_ILOCK_SHARED;
- bmapi_flags = XFS_BMAPI_ENTIRE;
-
- /* Attempt non-blocking lock */
- if (flags & BMAPI_TRYLOCK) {
- if (!xfs_ilock_nowait(ip, lockmode))
- return XFS_ERROR(EAGAIN);
- } else {
- xfs_ilock(ip, lockmode);
- }
- break;
- default:
- BUG();
- }
-
- ASSERT(offset <= mp->m_maxioffset);
- if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
- count = mp->m_maxioffset - offset;
- end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
-
- error = xfs_bmapi(NULL, ip, offset_fsb,
- (xfs_filblks_t)(end_fsb - offset_fsb),
- bmapi_flags, NULL, 0, imap,
- nimaps, NULL);
-
- if (error)
- goto out;
-
- switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
- case BMAPI_WRITE:
- /* If we found an extent, return it */
- if (*nimaps &&
- (imap->br_startblock != HOLESTARTBLOCK) &&
- (imap->br_startblock != DELAYSTARTBLOCK)) {
- trace_xfs_iomap_found(ip, offset, count, flags, imap);
- break;
- }
-
- if (flags & BMAPI_DIRECT) {
- error = xfs_iomap_write_direct(ip, offset, count, flags,
- imap, nimaps);
- } else {
- error = xfs_iomap_write_delay(ip, offset, count, flags,
- imap, nimaps);
- }
- if (!error) {
- trace_xfs_iomap_alloc(ip, offset, count, flags, imap);
- }
- *new = 1;
- break;
- case BMAPI_ALLOCATE:
- /* If we found an extent, return it */
- xfs_iunlock(ip, lockmode);
- lockmode = 0;
-
- if (*nimaps && !isnullstartblock(imap->br_startblock)) {
- trace_xfs_iomap_found(ip, offset, count, flags, imap);
- break;
- }
-
- error = xfs_iomap_write_allocate(ip, offset, count,
- imap, nimaps);
- break;
- }
-
- ASSERT(*nimaps <= 1);
-
-out:
- if (lockmode)
- xfs_iunlock(ip, lockmode);
- return XFS_ERROR(error);
-}
-
STATIC int
xfs_iomap_eof_align_last_fsb(
xfs_mount_t *mp,
@@ -236,14 +117,13 @@ xfs_cmn_err_fsblock_zero(
return EFSCORRUPTED;
}
-STATIC int
+int
xfs_iomap_write_direct(
xfs_inode_t *ip,
xfs_off_t offset,
size_t count,
- int flags,
xfs_bmbt_irec_t *imap,
- int *nmaps)
+ int nmaps)
{
xfs_mount_t *mp = ip->i_mount;
xfs_fileoff_t offset_fsb;
@@ -279,7 +159,7 @@ xfs_iomap_write_direct(
if (error)
goto error_out;
} else {
- if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK))
+ if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
last_fsb = MIN(last_fsb, (xfs_fileoff_t)
imap->br_blockcount +
imap->br_startoff);
@@ -331,7 +211,7 @@ xfs_iomap_write_direct(
xfs_trans_ijoin(tp, ip);
bmapi_flag = XFS_BMAPI_WRITE;
- if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
+ if (offset < ip->i_size || extsz)
bmapi_flag |= XFS_BMAPI_PREALLOC;
/*
@@ -370,7 +250,6 @@ xfs_iomap_write_direct(
goto error_out;
}
- *nmaps = 1;
return 0;
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
@@ -379,7 +258,6 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
error1: /* Just cancel transaction */
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
- *nmaps = 0; /* nothing set-up here */
error_out:
return XFS_ERROR(error);
@@ -389,6 +267,9 @@ error_out:
* If the caller is doing a write at the end of the file, then extend the
* allocation out to the file system's write iosize. We clean up any extra
* space left over when the file is closed in xfs_inactive().
+ *
+ * If we find we already have delalloc preallocation beyond EOF, don't do more
+ * preallocation as it it not needed.
*/
STATIC int
xfs_iomap_eof_want_preallocate(
@@ -396,7 +277,6 @@ xfs_iomap_eof_want_preallocate(
xfs_inode_t *ip,
xfs_off_t offset,
size_t count,
- int ioflag,
xfs_bmbt_irec_t *imap,
int nimaps,
int *prealloc)
@@ -405,6 +285,7 @@ xfs_iomap_eof_want_preallocate(
xfs_filblks_t count_fsb;
xfs_fsblock_t firstblock;
int n, error, imaps;
+ int found_delalloc = 0;
*prealloc = 0;
if ((offset + count) <= ip->i_size)
@@ -429,20 +310,66 @@ xfs_iomap_eof_want_preallocate(
return 0;
start_fsb += imap[n].br_blockcount;
count_fsb -= imap[n].br_blockcount;
+
+ if (imap[n].br_startblock == DELAYSTARTBLOCK)
+ found_delalloc = 1;
}
}
- *prealloc = 1;
+ if (!found_delalloc)
+ *prealloc = 1;
return 0;
}
-STATIC int
+/*
+ * If we don't have a user specified preallocation size, dynamically increase
+ * the preallocation size as the size of the file grows. Cap the maximum size
+ * at a single extent or less if the filesystem is near full. The closer the
+ * filesystem is to full, the smaller the maximum prealocation.
+ */
+STATIC xfs_fsblock_t
+xfs_iomap_prealloc_size(
+ struct xfs_mount *mp,
+ struct xfs_inode *ip)
+{
+ xfs_fsblock_t alloc_blocks = 0;
+
+ if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
+ int shift = 0;
+ int64_t freesp;
+
+ alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size);
+ alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
+ rounddown_pow_of_two(alloc_blocks));
+
+ xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+ freesp = mp->m_sb.sb_fdblocks;
+ if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
+ shift = 2;
+ if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
+ shift++;
+ if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
+ shift++;
+ if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
+ shift++;
+ if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
+ shift++;
+ }
+ if (shift)
+ alloc_blocks >>= shift;
+ }
+
+ if (alloc_blocks < mp->m_writeio_blocks)
+ alloc_blocks = mp->m_writeio_blocks;
+
+ return alloc_blocks;
+}
+
+int
xfs_iomap_write_delay(
xfs_inode_t *ip,
xfs_off_t offset,
size_t count,
- int ioflag,
- xfs_bmbt_irec_t *ret_imap,
- int *nmaps)
+ xfs_bmbt_irec_t *ret_imap)
{
xfs_mount_t *mp = ip->i_mount;
xfs_fileoff_t offset_fsb;
@@ -469,16 +396,19 @@ xfs_iomap_write_delay(
extsz = xfs_get_extsz_hint(ip);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
- ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
+ imap, XFS_WRITE_IMAPS, &prealloc);
if (error)
return error;
retry:
if (prealloc) {
+ xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip);
+
aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
- last_fsb = ioalign + mp->m_writeio_blocks;
+ last_fsb = ioalign + alloc_blocks;
} else {
last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
}
@@ -496,22 +426,31 @@ retry:
XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
&nimaps, NULL);
- if (error && (error != ENOSPC))
+ switch (error) {
+ case 0:
+ case ENOSPC:
+ case EDQUOT:
+ break;
+ default:
return XFS_ERROR(error);
+ }
/*
- * If bmapi returned us nothing, and if we didn't get back EDQUOT,
- * then we must have run out of space - flush all other inodes with
- * delalloc blocks and retry without EOF preallocation.
+ * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For
+ * ENOSPC, * flush all other inodes with delalloc blocks to free up
+ * some of the excess reserved metadata space. For both cases, retry
+ * without EOF preallocation.
*/
if (nimaps == 0) {
trace_xfs_delalloc_enospc(ip, offset, count);
if (flushed)
- return XFS_ERROR(ENOSPC);
+ return XFS_ERROR(error ? error : ENOSPC);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_flush_inodes(ip);
- xfs_ilock(ip, XFS_ILOCK_EXCL);
+ if (error == ENOSPC) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_flush_inodes(ip);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ }
flushed = 1;
error = 0;
@@ -523,8 +462,6 @@ retry:
return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
*ret_imap = imap[0];
- *nmaps = 1;
-
return 0;
}
@@ -538,13 +475,12 @@ retry:
* We no longer bother to look at the incoming map - all we have to
* guarantee is that whatever we allocate fills the required range.
*/
-STATIC int
+int
xfs_iomap_write_allocate(
xfs_inode_t *ip,
xfs_off_t offset,
size_t count,
- xfs_bmbt_irec_t *imap,
- int *retmap)
+ xfs_bmbt_irec_t *imap)
{
xfs_mount_t *mp = ip->i_mount;
xfs_fileoff_t offset_fsb, last_block;
@@ -557,8 +493,6 @@ xfs_iomap_write_allocate(
int error = 0;
int nres;
- *retmap = 0;
-
/*
* Make sure that the dquots are there.
*/
@@ -680,7 +614,6 @@ xfs_iomap_write_allocate(
if ((offset_fsb >= imap->br_startoff) &&
(offset_fsb < (imap->br_startoff +
imap->br_blockcount))) {
- *retmap = 1;
XFS_STATS_INC(xs_xstrat_quick);
return 0;
}
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 7748a43..8061576 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,30 +18,15 @@
#ifndef __XFS_IOMAP_H__
#define __XFS_IOMAP_H__
-/* base extent manipulation calls */
-#define BMAPI_READ (1 << 0) /* read extents */
-#define BMAPI_WRITE (1 << 1) /* create extents */
-#define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */
-
-/* modifiers */
-#define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */
-#define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */
-#define BMAPI_MMA (1 << 6) /* allocate for mmap write */
-#define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */
-
-#define BMAPI_FLAGS \
- { BMAPI_READ, "READ" }, \
- { BMAPI_WRITE, "WRITE" }, \
- { BMAPI_ALLOCATE, "ALLOCATE" }, \
- { BMAPI_IGNSTATE, "IGNSTATE" }, \
- { BMAPI_DIRECT, "DIRECT" }, \
- { BMAPI_TRYLOCK, "TRYLOCK" }
-
struct xfs_inode;
struct xfs_bmbt_irec;
-extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int,
- struct xfs_bmbt_irec *, int *, int *);
+extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
+ struct xfs_bmbt_irec *, int);
+extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t,
+ struct xfs_bmbt_irec *);
+extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
+ struct xfs_bmbt_irec *);
extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t);
#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index cee4ab9..0bf24b1 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -47,7 +47,7 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
xfs_buftarg_t *log_target,
xfs_daddr_t blk_offset,
int num_bblks);
-STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
+STATIC int xlog_space_left(struct log *log, atomic64_t *head);
STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
STATIC void xlog_dealloc_log(xlog_t *log);
@@ -70,7 +70,7 @@ STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
/* local functions to manipulate grant head */
STATIC int xlog_grant_log_space(xlog_t *log,
xlog_ticket_t *xtic);
-STATIC void xlog_grant_push_ail(xfs_mount_t *mp,
+STATIC void xlog_grant_push_ail(struct log *log,
int need_bytes);
STATIC void xlog_regrant_reserve_log_space(xlog_t *log,
xlog_ticket_t *ticket);
@@ -81,98 +81,73 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
#if defined(DEBUG)
STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
-STATIC void xlog_verify_grant_head(xlog_t *log, int equals);
+STATIC void xlog_verify_grant_tail(struct log *log);
STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
int count, boolean_t syncing);
STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
xfs_lsn_t tail_lsn);
#else
#define xlog_verify_dest_ptr(a,b)
-#define xlog_verify_grant_head(a,b)
+#define xlog_verify_grant_tail(a)
#define xlog_verify_iclog(a,b,c,d)
#define xlog_verify_tail_lsn(a,b,c)
#endif
STATIC int xlog_iclogs_empty(xlog_t *log);
-
static void
-xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
+xlog_grant_sub_space(
+ struct log *log,
+ atomic64_t *head,
+ int bytes)
{
- if (*qp) {
- tic->t_next = (*qp);
- tic->t_prev = (*qp)->t_prev;
- (*qp)->t_prev->t_next = tic;
- (*qp)->t_prev = tic;
- } else {
- tic->t_prev = tic->t_next = tic;
- *qp = tic;
- }
+ int64_t head_val = atomic64_read(head);
+ int64_t new, old;
- tic->t_flags |= XLOG_TIC_IN_Q;
-}
+ do {
+ int cycle, space;
-static void
-xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
-{
- if (tic == tic->t_next) {
- *qp = NULL;
- } else {
- *qp = tic->t_next;
- tic->t_next->t_prev = tic->t_prev;
- tic->t_prev->t_next = tic->t_next;
- }
+ xlog_crack_grant_head_val(head_val, &cycle, &space);
- tic->t_next = tic->t_prev = NULL;
- tic->t_flags &= ~XLOG_TIC_IN_Q;
+ space -= bytes;
+ if (space < 0) {
+ space += log->l_logsize;
+ cycle--;
+ }
+
+ old = head_val;
+ new = xlog_assign_grant_head_val(cycle, space);
+ head_val = atomic64_cmpxchg(head, old, new);
+ } while (head_val != old);
}
static void
-xlog_grant_sub_space(struct log *log, int bytes)
+xlog_grant_add_space(
+ struct log *log,
+ atomic64_t *head,
+ int bytes)
{
- log->l_grant_write_bytes -= bytes;
- if (log->l_grant_write_bytes < 0) {
- log->l_grant_write_bytes += log->l_logsize;
- log->l_grant_write_cycle--;
- }
-
- log->l_grant_reserve_bytes -= bytes;
- if ((log)->l_grant_reserve_bytes < 0) {
- log->l_grant_reserve_bytes += log->l_logsize;
- log->l_grant_reserve_cycle--;
- }
+ int64_t head_val = atomic64_read(head);
+ int64_t new, old;
-}
+ do {
+ int tmp;
+ int cycle, space;
-static void
-xlog_grant_add_space_write(struct log *log, int bytes)
-{
- int tmp = log->l_logsize - log->l_grant_write_bytes;
- if (tmp > bytes)
- log->l_grant_write_bytes += bytes;
- else {
- log->l_grant_write_cycle++;
- log->l_grant_write_bytes = bytes - tmp;
- }
-}
+ xlog_crack_grant_head_val(head_val, &cycle, &space);
-static void
-xlog_grant_add_space_reserve(struct log *log, int bytes)
-{
- int tmp = log->l_logsize - log->l_grant_reserve_bytes;
- if (tmp > bytes)
- log->l_grant_reserve_bytes += bytes;
- else {
- log->l_grant_reserve_cycle++;
- log->l_grant_reserve_bytes = bytes - tmp;
- }
-}
+ tmp = log->l_logsize - space;
+ if (tmp > bytes)
+ space += bytes;
+ else {
+ space = bytes - tmp;
+ cycle++;
+ }
-static inline void
-xlog_grant_add_space(struct log *log, int bytes)
-{
- xlog_grant_add_space_write(log, bytes);
- xlog_grant_add_space_reserve(log, bytes);
+ old = head_val;
+ new = xlog_assign_grant_head_val(cycle, space);
+ head_val = atomic64_cmpxchg(head, old, new);
+ } while (head_val != old);
}
static void
@@ -355,7 +330,7 @@ xfs_log_reserve(
trace_xfs_log_reserve(log, internal_ticket);
- xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
+ xlog_grant_push_ail(log, internal_ticket->t_unit_res);
retval = xlog_regrant_write_log_space(log, internal_ticket);
} else {
/* may sleep if need to allocate more tickets */
@@ -369,7 +344,7 @@ xfs_log_reserve(
trace_xfs_log_reserve(log, internal_ticket);
- xlog_grant_push_ail(mp,
+ xlog_grant_push_ail(log,
(internal_ticket->t_unit_res *
internal_ticket->t_cnt));
retval = xlog_grant_log_space(log, internal_ticket);
@@ -584,8 +559,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_DIRTY)) {
if (!XLOG_FORCED_SHUTDOWN(log)) {
- sv_wait(&iclog->ic_force_wait, PMEM,
- &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_force_wait,
+ &log->l_icloglock);
} else {
spin_unlock(&log->l_icloglock);
}
@@ -625,8 +600,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
|| iclog->ic_state == XLOG_STATE_DIRTY
|| iclog->ic_state == XLOG_STATE_IOERROR) ) {
- sv_wait(&iclog->ic_force_wait, PMEM,
- &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_force_wait,
+ &log->l_icloglock);
} else {
spin_unlock(&log->l_icloglock);
}
@@ -703,55 +678,46 @@ xfs_log_move_tail(xfs_mount_t *mp,
{
xlog_ticket_t *tic;
xlog_t *log = mp->m_log;
- int need_bytes, free_bytes, cycle, bytes;
+ int need_bytes, free_bytes;
if (XLOG_FORCED_SHUTDOWN(log))
return;
- if (tail_lsn == 0) {
- /* needed since sync_lsn is 64 bits */
- spin_lock(&log->l_icloglock);
- tail_lsn = log->l_last_sync_lsn;
- spin_unlock(&log->l_icloglock);
- }
-
- spin_lock(&log->l_grant_lock);
+ if (tail_lsn == 0)
+ tail_lsn = atomic64_read(&log->l_last_sync_lsn);
- /* Also an invalid lsn. 1 implies that we aren't passing in a valid
- * tail_lsn.
- */
- if (tail_lsn != 1) {
- log->l_tail_lsn = tail_lsn;
- }
+ /* tail_lsn == 1 implies that we weren't passed a valid value. */
+ if (tail_lsn != 1)
+ atomic64_set(&log->l_tail_lsn, tail_lsn);
- if ((tic = log->l_write_headq)) {
+ if (!list_empty_careful(&log->l_writeq)) {
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
#endif
- cycle = log->l_grant_write_cycle;
- bytes = log->l_grant_write_bytes;
- free_bytes = xlog_space_left(log, cycle, bytes);
- do {
+ spin_lock(&log->l_grant_write_lock);
+ free_bytes = xlog_space_left(log, &log->l_grant_write_head);
+ list_for_each_entry(tic, &log->l_writeq, t_queue) {
ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
if (free_bytes < tic->t_unit_res && tail_lsn != 1)
break;
tail_lsn = 0;
free_bytes -= tic->t_unit_res;
- sv_signal(&tic->t_wait);
- tic = tic->t_next;
- } while (tic != log->l_write_headq);
+ trace_xfs_log_regrant_write_wake_up(log, tic);
+ wake_up(&tic->t_wait);
+ }
+ spin_unlock(&log->l_grant_write_lock);
}
- if ((tic = log->l_reserve_headq)) {
+
+ if (!list_empty_careful(&log->l_reserveq)) {
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
#endif
- cycle = log->l_grant_reserve_cycle;
- bytes = log->l_grant_reserve_bytes;
- free_bytes = xlog_space_left(log, cycle, bytes);
- do {
+ spin_lock(&log->l_grant_reserve_lock);
+ free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
+ list_for_each_entry(tic, &log->l_reserveq, t_queue) {
if (tic->t_flags & XLOG_TIC_PERM_RESERV)
need_bytes = tic->t_unit_res*tic->t_cnt;
else
@@ -760,12 +726,12 @@ xfs_log_move_tail(xfs_mount_t *mp,
break;
tail_lsn = 0;
free_bytes -= need_bytes;
- sv_signal(&tic->t_wait);
- tic = tic->t_next;
- } while (tic != log->l_reserve_headq);
+ trace_xfs_log_grant_wake_up(log, tic);
+ wake_up(&tic->t_wait);
+ }
+ spin_unlock(&log->l_grant_reserve_lock);
}
- spin_unlock(&log->l_grant_lock);
-} /* xfs_log_move_tail */
+}
/*
* Determine if we have a transaction that has gone to disk
@@ -831,23 +797,19 @@ xfs_log_need_covered(xfs_mount_t *mp)
* We may be holding the log iclog lock upon entering this routine.
*/
xfs_lsn_t
-xlog_assign_tail_lsn(xfs_mount_t *mp)
+xlog_assign_tail_lsn(
+ struct xfs_mount *mp)
{
- xfs_lsn_t tail_lsn;
- xlog_t *log = mp->m_log;
+ xfs_lsn_t tail_lsn;
+ struct log *log = mp->m_log;
tail_lsn = xfs_trans_ail_tail(mp->m_ail);
- spin_lock(&log->l_grant_lock);
- if (tail_lsn != 0) {
- log->l_tail_lsn = tail_lsn;
- } else {
- tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn;
- }
- spin_unlock(&log->l_grant_lock);
+ if (!tail_lsn)
+ tail_lsn = atomic64_read(&log->l_last_sync_lsn);
+ atomic64_set(&log->l_tail_lsn, tail_lsn);
return tail_lsn;
-} /* xlog_assign_tail_lsn */
-
+}
/*
* Return the space in the log between the tail and the head. The head
@@ -864,21 +826,26 @@ xlog_assign_tail_lsn(xfs_mount_t *mp)
* result is that we return the size of the log as the amount of space left.
*/
STATIC int
-xlog_space_left(xlog_t *log, int cycle, int bytes)
-{
- int free_bytes;
- int tail_bytes;
- int tail_cycle;
-
- tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn));
- tail_cycle = CYCLE_LSN(log->l_tail_lsn);
- if ((tail_cycle == cycle) && (bytes >= tail_bytes)) {
- free_bytes = log->l_logsize - (bytes - tail_bytes);
- } else if ((tail_cycle + 1) < cycle) {
+xlog_space_left(
+ struct log *log,
+ atomic64_t *head)
+{
+ int free_bytes;
+ int tail_bytes;
+ int tail_cycle;
+ int head_cycle;
+ int head_bytes;
+
+ xlog_crack_grant_head(head, &head_cycle, &head_bytes);
+ xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes);
+ tail_bytes = BBTOB(tail_bytes);
+ if (tail_cycle == head_cycle && head_bytes >= tail_bytes)
+ free_bytes = log->l_logsize - (head_bytes - tail_bytes);
+ else if (tail_cycle + 1 < head_cycle)
return 0;
- } else if (tail_cycle < cycle) {
- ASSERT(tail_cycle == (cycle - 1));
- free_bytes = tail_bytes - bytes;
+ else if (tail_cycle < head_cycle) {
+ ASSERT(tail_cycle == (head_cycle - 1));
+ free_bytes = tail_bytes - head_bytes;
} else {
/*
* The reservation head is behind the tail.
@@ -889,12 +856,12 @@ xlog_space_left(xlog_t *log, int cycle, int bytes)
"xlog_space_left: head behind tail\n"
" tail_cycle = %d, tail_bytes = %d\n"
" GH cycle = %d, GH bytes = %d",
- tail_cycle, tail_bytes, cycle, bytes);
+ tail_cycle, tail_bytes, head_cycle, head_bytes);
ASSERT(0);
free_bytes = log->l_logsize;
}
return free_bytes;
-} /* xlog_space_left */
+}
/*
@@ -1047,12 +1014,16 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_flags |= XLOG_ACTIVE_RECOVERY;
log->l_prev_block = -1;
- log->l_tail_lsn = xlog_assign_lsn(1, 0);
/* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
- log->l_last_sync_lsn = log->l_tail_lsn;
+ xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
+ xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
- log->l_grant_reserve_cycle = 1;
- log->l_grant_write_cycle = 1;
+ xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0);
+ xlog_assign_grant_head(&log->l_grant_write_head, 1, 0);
+ INIT_LIST_HEAD(&log->l_reserveq);
+ INIT_LIST_HEAD(&log->l_writeq);
+ spin_lock_init(&log->l_grant_reserve_lock);
+ spin_lock_init(&log->l_grant_write_lock);
error = EFSCORRUPTED;
if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -1094,8 +1065,7 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_xbuf = bp;
spin_lock_init(&log->l_icloglock);
- spin_lock_init(&log->l_grant_lock);
- sv_init(&log->l_flush_wait, 0, "flush_wait");
+ init_waitqueue_head(&log->l_flush_wait);
/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1151,8 +1121,8 @@ xlog_alloc_log(xfs_mount_t *mp,
ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
- sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
- sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
+ init_waitqueue_head(&iclog->ic_force_wait);
+ init_waitqueue_head(&iclog->ic_write_wait);
iclogp = &iclog->ic_next;
}
@@ -1167,15 +1137,11 @@ xlog_alloc_log(xfs_mount_t *mp,
out_free_iclog:
for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
prev_iclog = iclog->ic_next;
- if (iclog->ic_bp) {
- sv_destroy(&iclog->ic_force_wait);
- sv_destroy(&iclog->ic_write_wait);
+ if (iclog->ic_bp)
xfs_buf_free(iclog->ic_bp);
- }
kmem_free(iclog);
}
spinlock_destroy(&log->l_icloglock);
- spinlock_destroy(&log->l_grant_lock);
xfs_buf_free(log->l_xbuf);
out_free_log:
kmem_free(log);
@@ -1223,61 +1189,60 @@ xlog_commit_record(
* water mark. In this manner, we would be creating a low water mark.
*/
STATIC void
-xlog_grant_push_ail(xfs_mount_t *mp,
- int need_bytes)
+xlog_grant_push_ail(
+ struct log *log,
+ int need_bytes)
{
- xlog_t *log = mp->m_log; /* pointer to the log */
- xfs_lsn_t tail_lsn; /* lsn of the log tail */
- xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */
- int free_blocks; /* free blocks left to write to */
- int free_bytes; /* free bytes left to write to */
- int threshold_block; /* block in lsn we'd like to be at */
- int threshold_cycle; /* lsn cycle we'd like to be at */
- int free_threshold;
-
- ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
-
- spin_lock(&log->l_grant_lock);
- free_bytes = xlog_space_left(log,
- log->l_grant_reserve_cycle,
- log->l_grant_reserve_bytes);
- tail_lsn = log->l_tail_lsn;
- free_blocks = BTOBBT(free_bytes);
-
- /*
- * Set the threshold for the minimum number of free blocks in the
- * log to the maximum of what the caller needs, one quarter of the
- * log, and 256 blocks.
- */
- free_threshold = BTOBB(need_bytes);
- free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
- free_threshold = MAX(free_threshold, 256);
- if (free_blocks < free_threshold) {
- threshold_block = BLOCK_LSN(tail_lsn) + free_threshold;
- threshold_cycle = CYCLE_LSN(tail_lsn);
+ xfs_lsn_t threshold_lsn = 0;
+ xfs_lsn_t last_sync_lsn;
+ int free_blocks;
+ int free_bytes;
+ int threshold_block;
+ int threshold_cycle;
+ int free_threshold;
+
+ ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
+
+ free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
+ free_blocks = BTOBBT(free_bytes);
+
+ /*
+ * Set the threshold for the minimum number of free blocks in the
+ * log to the maximum of what the caller needs, one quarter of the
+ * log, and 256 blocks.
+ */
+ free_threshold = BTOBB(need_bytes);
+ free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
+ free_threshold = MAX(free_threshold, 256);
+ if (free_blocks >= free_threshold)
+ return;
+
+ xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
+ &threshold_block);
+ threshold_block += free_threshold;
if (threshold_block >= log->l_logBBsize) {
- threshold_block -= log->l_logBBsize;
- threshold_cycle += 1;
+ threshold_block -= log->l_logBBsize;
+ threshold_cycle += 1;
}
- threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block);
+ threshold_lsn = xlog_assign_lsn(threshold_cycle,
+ threshold_block);
+ /*
+ * Don't pass in an lsn greater than the lsn of the last
+ * log record known to be on disk. Use a snapshot of the last sync lsn
+ * so that it doesn't change between the compare and the set.
+ */
+ last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
+ if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
+ threshold_lsn = last_sync_lsn;
- /* Don't pass in an lsn greater than the lsn of the last
- * log record known to be on disk.
+ /*
+ * Get the transaction layer to kick the dirty buffers out to
+ * disk asynchronously. No point in trying to do this if
+ * the filesystem is shutting down.
*/
- if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0)
- threshold_lsn = log->l_last_sync_lsn;
- }
- spin_unlock(&log->l_grant_lock);
-
- /*
- * Get the transaction layer to kick the dirty buffers out to
- * disk asynchronously. No point in trying to do this if
- * the filesystem is shutting down.
- */
- if (threshold_lsn &&
- !XLOG_FORCED_SHUTDOWN(log))
- xfs_trans_ail_push(log->l_ailp, threshold_lsn);
-} /* xlog_grant_push_ail */
+ if (!XLOG_FORCED_SHUTDOWN(log))
+ xfs_trans_ail_push(log->l_ailp, threshold_lsn);
+}
/*
* The bdstrat callback function for log bufs. This gives us a central
@@ -1372,9 +1337,8 @@ xlog_sync(xlog_t *log,
roundoff < BBTOB(1)));
/* move grant heads by roundoff in sync */
- spin_lock(&log->l_grant_lock);
- xlog_grant_add_space(log, roundoff);
- spin_unlock(&log->l_grant_lock);
+ xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff);
+ xlog_grant_add_space(log, &log->l_grant_write_head, roundoff);
/* put cycle number in every block */
xlog_pack_data(log, iclog, roundoff);
@@ -1489,15 +1453,12 @@ xlog_dealloc_log(xlog_t *log)
iclog = log->l_iclog;
for (i=0; i<log->l_iclog_bufs; i++) {
- sv_destroy(&iclog->ic_force_wait);
- sv_destroy(&iclog->ic_write_wait);
xfs_buf_free(iclog->ic_bp);
next_iclog = iclog->ic_next;
kmem_free(iclog);
iclog = next_iclog;
}
spinlock_destroy(&log->l_icloglock);
- spinlock_destroy(&log->l_grant_lock);
xfs_buf_free(log->l_xbuf);
log->l_mp->m_log = NULL;
@@ -2232,7 +2193,7 @@ xlog_state_do_callback(
lowest_lsn = xlog_get_lowest_lsn(log);
if (lowest_lsn &&
XFS_LSN_CMP(lowest_lsn,
- be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
+ be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
iclog = iclog->ic_next;
continue; /* Leave this iclog for
* another thread */
@@ -2240,23 +2201,21 @@ xlog_state_do_callback(
iclog->ic_state = XLOG_STATE_CALLBACK;
- spin_unlock(&log->l_icloglock);
- /* l_last_sync_lsn field protected by
- * l_grant_lock. Don't worry about iclog's lsn.
- * No one else can be here except us.
+ /*
+ * update the last_sync_lsn before we drop the
+ * icloglock to ensure we are the only one that
+ * can update it.
*/
- spin_lock(&log->l_grant_lock);
- ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn,
- be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
- log->l_last_sync_lsn =
- be64_to_cpu(iclog->ic_header.h_lsn);
- spin_unlock(&log->l_grant_lock);
+ ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
+ be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
+ atomic64_set(&log->l_last_sync_lsn,
+ be64_to_cpu(iclog->ic_header.h_lsn));
- } else {
- spin_unlock(&log->l_icloglock);
+ } else
ioerrors++;
- }
+
+ spin_unlock(&log->l_icloglock);
/*
* Keep processing entries in the callback list until
@@ -2297,7 +2256,7 @@ xlog_state_do_callback(
xlog_state_clean_log(log);
/* wake up threads waiting in xfs_log_force() */
- sv_broadcast(&iclog->ic_force_wait);
+ wake_up_all(&iclog->ic_force_wait);
iclog = iclog->ic_next;
} while (first_iclog != iclog);
@@ -2344,7 +2303,7 @@ xlog_state_do_callback(
spin_unlock(&log->l_icloglock);
if (wake)
- sv_broadcast(&log->l_flush_wait);
+ wake_up_all(&log->l_flush_wait);
}
@@ -2395,7 +2354,7 @@ xlog_state_done_syncing(
* iclog buffer, we wake them all, one will get to do the
* I/O, the others get to wait for the result.
*/
- sv_broadcast(&iclog->ic_write_wait);
+ wake_up_all(&iclog->ic_write_wait);
spin_unlock(&log->l_icloglock);
xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
} /* xlog_state_done_syncing */
@@ -2444,7 +2403,7 @@ restart:
XFS_STATS_INC(xs_log_noiclogs);
/* Wait for log writes to have flushed */
- sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
+ xlog_wait(&log->l_flush_wait, &log->l_icloglock);
goto restart;
}
@@ -2527,6 +2486,18 @@ restart:
*
* Once a ticket gets put onto the reserveq, it will only return after
* the needed reservation is satisfied.
+ *
+ * This function is structured so that it has a lock free fast path. This is
+ * necessary because every new transaction reservation will come through this
+ * path. Hence any lock will be globally hot if we take it unconditionally on
+ * every pass.
+ *
+ * As tickets are only ever moved on and off the reserveq under the
+ * l_grant_reserve_lock, we only need to take that lock if we are going
+ * to add the ticket to the queue and sleep. We can avoid taking the lock if the
+ * ticket was never added to the reserveq because the t_queue list head will be
+ * empty and we hold the only reference to it so it can safely be checked
+ * unlocked.
*/
STATIC int
xlog_grant_log_space(xlog_t *log,
@@ -2534,24 +2505,27 @@ xlog_grant_log_space(xlog_t *log,
{
int free_bytes;
int need_bytes;
-#ifdef DEBUG
- xfs_lsn_t tail_lsn;
-#endif
-
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("grant Recovery problem");
#endif
- /* Is there space or do we need to sleep? */
- spin_lock(&log->l_grant_lock);
-
trace_xfs_log_grant_enter(log, tic);
+ need_bytes = tic->t_unit_res;
+ if (tic->t_flags & XFS_LOG_PERM_RESERV)
+ need_bytes *= tic->t_ocnt;
+
/* something is already sleeping; insert new transaction at end */
- if (log->l_reserve_headq) {
- xlog_ins_ticketq(&log->l_reserve_headq, tic);
+ if (!list_empty_careful(&log->l_reserveq)) {
+ spin_lock(&log->l_grant_reserve_lock);
+ /* recheck the queue now we are locked */
+ if (list_empty(&log->l_reserveq)) {
+ spin_unlock(&log->l_grant_reserve_lock);
+ goto redo;
+ }
+ list_add_tail(&tic->t_queue, &log->l_reserveq);
trace_xfs_log_grant_sleep1(log, tic);
@@ -2563,72 +2537,57 @@ xlog_grant_log_space(xlog_t *log,
goto error_return;
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
+
/*
* If we got an error, and the filesystem is shutting down,
* we'll catch it down below. So just continue...
*/
trace_xfs_log_grant_wake1(log, tic);
- spin_lock(&log->l_grant_lock);
}
- if (tic->t_flags & XFS_LOG_PERM_RESERV)
- need_bytes = tic->t_unit_res*tic->t_ocnt;
- else
- need_bytes = tic->t_unit_res;
redo:
if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
+ goto error_return_unlocked;
- free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle,
- log->l_grant_reserve_bytes);
+ free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
if (free_bytes < need_bytes) {
- if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
- xlog_ins_ticketq(&log->l_reserve_headq, tic);
+ spin_lock(&log->l_grant_reserve_lock);
+ if (list_empty(&tic->t_queue))
+ list_add_tail(&tic->t_queue, &log->l_reserveq);
trace_xfs_log_grant_sleep2(log, tic);
- spin_unlock(&log->l_grant_lock);
- xlog_grant_push_ail(log->l_mp, need_bytes);
- spin_lock(&log->l_grant_lock);
-
- XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
-
- spin_lock(&log->l_grant_lock);
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
- trace_xfs_log_grant_wake2(log, tic);
+ xlog_grant_push_ail(log, need_bytes);
+
+ XFS_STATS_INC(xs_sleep_logspace);
+ xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
+ trace_xfs_log_grant_wake2(log, tic);
goto redo;
- } else if (tic->t_flags & XLOG_TIC_IN_Q)
- xlog_del_ticketq(&log->l_reserve_headq, tic);
+ }
- /* we've got enough space */
- xlog_grant_add_space(log, need_bytes);
-#ifdef DEBUG
- tail_lsn = log->l_tail_lsn;
- /*
- * Check to make sure the grant write head didn't just over lap the
- * tail. If the cycles are the same, we can't be overlapping.
- * Otherwise, make sure that the cycles differ by exactly one and
- * check the byte count.
- */
- if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
- ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
- ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
+ if (!list_empty(&tic->t_queue)) {
+ spin_lock(&log->l_grant_reserve_lock);
+ list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_reserve_lock);
}
-#endif
+
+ /* we've got enough space */
+ xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
+ xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
trace_xfs_log_grant_exit(log, tic);
- xlog_verify_grant_head(log, 1);
- spin_unlock(&log->l_grant_lock);
+ xlog_verify_grant_tail(log);
return 0;
- error_return:
- if (tic->t_flags & XLOG_TIC_IN_Q)
- xlog_del_ticketq(&log->l_reserve_headq, tic);
-
+error_return_unlocked:
+ spin_lock(&log->l_grant_reserve_lock);
+error_return:
+ list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_reserve_lock);
trace_xfs_log_grant_error(log, tic);
/*
@@ -2638,7 +2597,6 @@ redo:
*/
tic->t_curr_res = 0;
tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
- spin_unlock(&log->l_grant_lock);
return XFS_ERROR(EIO);
} /* xlog_grant_log_space */
@@ -2646,17 +2604,14 @@ redo:
/*
* Replenish the byte reservation required by moving the grant write head.
*
- *
+ * Similar to xlog_grant_log_space, the function is structured to have a lock
+ * free fast path.
*/
STATIC int
xlog_regrant_write_log_space(xlog_t *log,
xlog_ticket_t *tic)
{
int free_bytes, need_bytes;
- xlog_ticket_t *ntic;
-#ifdef DEBUG
- xfs_lsn_t tail_lsn;
-#endif
tic->t_curr_res = tic->t_unit_res;
xlog_tic_reset_res(tic);
@@ -2669,12 +2624,9 @@ xlog_regrant_write_log_space(xlog_t *log,
panic("regrant Recovery problem");
#endif
- spin_lock(&log->l_grant_lock);
-
trace_xfs_log_regrant_write_enter(log, tic);
-
if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
+ goto error_return_unlocked;
/* If there are other waiters on the queue then give them a
* chance at logspace before us. Wake up the first waiters,
@@ -2683,92 +2635,76 @@ xlog_regrant_write_log_space(xlog_t *log,
* this transaction.
*/
need_bytes = tic->t_unit_res;
- if ((ntic = log->l_write_headq)) {
- free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
- log->l_grant_write_bytes);
- do {
+ if (!list_empty_careful(&log->l_writeq)) {
+ struct xlog_ticket *ntic;
+
+ spin_lock(&log->l_grant_write_lock);
+ free_bytes = xlog_space_left(log, &log->l_grant_write_head);
+ list_for_each_entry(ntic, &log->l_writeq, t_queue) {
ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
if (free_bytes < ntic->t_unit_res)
break;
free_bytes -= ntic->t_unit_res;
- sv_signal(&ntic->t_wait);
- ntic = ntic->t_next;
- } while (ntic != log->l_write_headq);
-
- if (ntic != log->l_write_headq) {
- if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
- xlog_ins_ticketq(&log->l_write_headq, tic);
+ wake_up(&ntic->t_wait);
+ }
+ if (ntic != list_first_entry(&log->l_writeq,
+ struct xlog_ticket, t_queue)) {
+ if (list_empty(&tic->t_queue))
+ list_add_tail(&tic->t_queue, &log->l_writeq);
trace_xfs_log_regrant_write_sleep1(log, tic);
- spin_unlock(&log->l_grant_lock);
- xlog_grant_push_ail(log->l_mp, need_bytes);
- spin_lock(&log->l_grant_lock);
+ xlog_grant_push_ail(log, need_bytes);
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_wait, PINOD|PLTWAIT,
- &log->l_grant_lock, s);
-
- /* If we're shutting down, this tic is already
- * off the queue */
- spin_lock(&log->l_grant_lock);
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
+ xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
trace_xfs_log_regrant_write_wake1(log, tic);
- }
+ } else
+ spin_unlock(&log->l_grant_write_lock);
}
redo:
if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
+ goto error_return_unlocked;
- free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
- log->l_grant_write_bytes);
+ free_bytes = xlog_space_left(log, &log->l_grant_write_head);
if (free_bytes < need_bytes) {
- if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
- xlog_ins_ticketq(&log->l_write_headq, tic);
- spin_unlock(&log->l_grant_lock);
- xlog_grant_push_ail(log->l_mp, need_bytes);
- spin_lock(&log->l_grant_lock);
-
- XFS_STATS_INC(xs_sleep_logspace);
- trace_xfs_log_regrant_write_sleep2(log, tic);
-
- sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ spin_lock(&log->l_grant_write_lock);
+ if (list_empty(&tic->t_queue))
+ list_add_tail(&tic->t_queue, &log->l_writeq);
- /* If we're shutting down, this tic is already off the queue */
- spin_lock(&log->l_grant_lock);
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
+ xlog_grant_push_ail(log, need_bytes);
+
+ XFS_STATS_INC(xs_sleep_logspace);
+ trace_xfs_log_regrant_write_sleep2(log, tic);
+ xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
+
trace_xfs_log_regrant_write_wake2(log, tic);
goto redo;
- } else if (tic->t_flags & XLOG_TIC_IN_Q)
- xlog_del_ticketq(&log->l_write_headq, tic);
+ }
- /* we've got enough space */
- xlog_grant_add_space_write(log, need_bytes);
-#ifdef DEBUG
- tail_lsn = log->l_tail_lsn;
- if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
- ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
- ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
+ if (!list_empty(&tic->t_queue)) {
+ spin_lock(&log->l_grant_write_lock);
+ list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_write_lock);
}
-#endif
+ /* we've got enough space */
+ xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
trace_xfs_log_regrant_write_exit(log, tic);
-
- xlog_verify_grant_head(log, 1);
- spin_unlock(&log->l_grant_lock);
+ xlog_verify_grant_tail(log);
return 0;
+ error_return_unlocked:
+ spin_lock(&log->l_grant_write_lock);
error_return:
- if (tic->t_flags & XLOG_TIC_IN_Q)
- xlog_del_ticketq(&log->l_reserve_headq, tic);
-
+ list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_write_lock);
trace_xfs_log_regrant_write_error(log, tic);
/*
@@ -2778,7 +2714,6 @@ redo:
*/
tic->t_curr_res = 0;
tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
- spin_unlock(&log->l_grant_lock);
return XFS_ERROR(EIO);
} /* xlog_regrant_write_log_space */
@@ -2799,27 +2734,24 @@ xlog_regrant_reserve_log_space(xlog_t *log,
if (ticket->t_cnt > 0)
ticket->t_cnt--;
- spin_lock(&log->l_grant_lock);
- xlog_grant_sub_space(log, ticket->t_curr_res);
+ xlog_grant_sub_space(log, &log->l_grant_reserve_head,
+ ticket->t_curr_res);
+ xlog_grant_sub_space(log, &log->l_grant_write_head,
+ ticket->t_curr_res);
ticket->t_curr_res = ticket->t_unit_res;
xlog_tic_reset_res(ticket);
trace_xfs_log_regrant_reserve_sub(log, ticket);
- xlog_verify_grant_head(log, 1);
-
/* just return if we still have some of the pre-reserved space */
- if (ticket->t_cnt > 0) {
- spin_unlock(&log->l_grant_lock);
+ if (ticket->t_cnt > 0)
return;
- }
- xlog_grant_add_space_reserve(log, ticket->t_unit_res);
+ xlog_grant_add_space(log, &log->l_grant_reserve_head,
+ ticket->t_unit_res);
trace_xfs_log_regrant_reserve_exit(log, ticket);
- xlog_verify_grant_head(log, 0);
- spin_unlock(&log->l_grant_lock);
ticket->t_curr_res = ticket->t_unit_res;
xlog_tic_reset_res(ticket);
} /* xlog_regrant_reserve_log_space */
@@ -2843,28 +2775,29 @@ STATIC void
xlog_ungrant_log_space(xlog_t *log,
xlog_ticket_t *ticket)
{
+ int bytes;
+
if (ticket->t_cnt > 0)
ticket->t_cnt--;
- spin_lock(&log->l_grant_lock);
trace_xfs_log_ungrant_enter(log, ticket);
-
- xlog_grant_sub_space(log, ticket->t_curr_res);
-
trace_xfs_log_ungrant_sub(log, ticket);
- /* If this is a permanent reservation ticket, we may be able to free
+ /*
+ * If this is a permanent reservation ticket, we may be able to free
* up more space based on the remaining count.
*/
+ bytes = ticket->t_curr_res;
if (ticket->t_cnt > 0) {
ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
- xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
+ bytes += ticket->t_unit_res*ticket->t_cnt;
}
+ xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes);
+ xlog_grant_sub_space(log, &log->l_grant_write_head, bytes);
+
trace_xfs_log_ungrant_exit(log, ticket);
- xlog_verify_grant_head(log, 1);
- spin_unlock(&log->l_grant_lock);
xfs_log_move_tail(log->l_mp, 1);
} /* xlog_ungrant_log_space */
@@ -2901,11 +2834,11 @@ xlog_state_release_iclog(
if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
/* update tail before writing to iclog */
- xlog_assign_tail_lsn(log->l_mp);
+ xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
sync++;
iclog->ic_state = XLOG_STATE_SYNCING;
- iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn);
- xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn);
+ iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+ xlog_verify_tail_lsn(log, iclog, tail_lsn);
/* cycle incremented when incrementing curr_block */
}
spin_unlock(&log->l_icloglock);
@@ -3088,7 +3021,7 @@ maybe_sleep:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3206,8 +3139,8 @@ try_again:
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_prev->ic_write_wait,
- PSWP, &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_prev->ic_write_wait,
+ &log->l_icloglock);
if (log_flushed)
*log_flushed = 1;
already_slept = 1;
@@ -3235,7 +3168,7 @@ try_again:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3310,10 +3243,8 @@ xfs_log_ticket_put(
xlog_ticket_t *ticket)
{
ASSERT(atomic_read(&ticket->t_ref) > 0);
- if (atomic_dec_and_test(&ticket->t_ref)) {
- sv_destroy(&ticket->t_wait);
+ if (atomic_dec_and_test(&ticket->t_ref))
kmem_zone_free(xfs_log_ticket_zone, ticket);
- }
}
xlog_ticket_t *
@@ -3435,6 +3366,7 @@ xlog_ticket_alloc(
}
atomic_set(&tic->t_ref, 1);
+ INIT_LIST_HEAD(&tic->t_queue);
tic->t_unit_res = unit_bytes;
tic->t_curr_res = unit_bytes;
tic->t_cnt = cnt;
@@ -3445,7 +3377,7 @@ xlog_ticket_alloc(
tic->t_trans_type = 0;
if (xflags & XFS_LOG_PERM_RESERV)
tic->t_flags |= XLOG_TIC_PERM_RESERV;
- sv_init(&tic->t_wait, SV_DEFAULT, "logtick");
+ init_waitqueue_head(&tic->t_wait);
xlog_tic_reset_res(tic);
@@ -3484,18 +3416,25 @@ xlog_verify_dest_ptr(
}
STATIC void
-xlog_verify_grant_head(xlog_t *log, int equals)
+xlog_verify_grant_tail(
+ struct log *log)
{
- if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) {
- if (equals)
- ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes);
- else
- ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes);
- } else {
- ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle);
- ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes);
- }
-} /* xlog_verify_grant_head */
+ int tail_cycle, tail_blocks;
+ int cycle, space;
+
+ /*
+ * Check to make sure the grant write head didn't just over lap the
+ * tail. If the cycles are the same, we can't be overlapping.
+ * Otherwise, make sure that the cycles differ by exactly one and
+ * check the byte count.
+ */
+ xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space);
+ xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
+ if (tail_cycle != cycle) {
+ ASSERT(cycle - 1 == tail_cycle);
+ ASSERT(space <= BBTOB(tail_blocks));
+ }
+}
/* check if it will fit */
STATIC void
@@ -3716,12 +3655,10 @@ xfs_log_force_umount(
xlog_cil_force(log);
/*
- * We must hold both the GRANT lock and the LOG lock,
- * before we mark the filesystem SHUTDOWN and wake
- * everybody up to tell the bad news.
+ * mark the filesystem and the as in a shutdown state and wake
+ * everybody up to tell them the bad news.
*/
spin_lock(&log->l_icloglock);
- spin_lock(&log->l_grant_lock);
mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
if (mp->m_sb_bp)
XFS_BUF_DONE(mp->m_sb_bp);
@@ -3742,27 +3679,21 @@ xfs_log_force_umount(
spin_unlock(&log->l_icloglock);
/*
- * We don't want anybody waiting for log reservations
- * after this. That means we have to wake up everybody
- * queued up on reserve_headq as well as write_headq.
- * In addition, we make sure in xlog_{re}grant_log_space
- * that we don't enqueue anything once the SHUTDOWN flag
- * is set, and this action is protected by the GRANTLOCK.
+ * We don't want anybody waiting for log reservations after this. That
+ * means we have to wake up everybody queued up on reserveq as well as
+ * writeq. In addition, we make sure in xlog_{re}grant_log_space that
+ * we don't enqueue anything once the SHUTDOWN flag is set, and this
+ * action is protected by the grant locks.
*/
- if ((tic = log->l_reserve_headq)) {
- do {
- sv_signal(&tic->t_wait);
- tic = tic->t_next;
- } while (tic != log->l_reserve_headq);
- }
-
- if ((tic = log->l_write_headq)) {
- do {
- sv_signal(&tic->t_wait);
- tic = tic->t_next;
- } while (tic != log->l_write_headq);
- }
- spin_unlock(&log->l_grant_lock);
+ spin_lock(&log->l_grant_reserve_lock);
+ list_for_each_entry(tic, &log->l_reserveq, t_queue)
+ wake_up(&tic->t_wait);
+ spin_unlock(&log->l_grant_reserve_lock);
+
+ spin_lock(&log->l_grant_write_lock);
+ list_for_each_entry(tic, &log->l_writeq, t_queue)
+ wake_up(&tic->t_wait);
+ spin_unlock(&log->l_grant_write_lock);
if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
ASSERT(!logerror);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 23d6ceb..9dc8125 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -61,7 +61,7 @@ xlog_cil_init(
INIT_LIST_HEAD(&cil->xc_committing);
spin_lock_init(&cil->xc_cil_lock);
init_rwsem(&cil->xc_ctx_lock);
- sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait");
+ init_waitqueue_head(&cil->xc_commit_wait);
INIT_LIST_HEAD(&ctx->committing);
INIT_LIST_HEAD(&ctx->busy_extents);
@@ -361,15 +361,10 @@ xlog_cil_committed(
int abort)
{
struct xfs_cil_ctx *ctx = args;
- struct xfs_log_vec *lv;
- int abortflag = abort ? XFS_LI_ABORTED : 0;
struct xfs_busy_extent *busyp, *n;
- /* unpin all the log items */
- for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) {
- xfs_trans_item_committed(lv->lv_item, ctx->start_lsn,
- abortflag);
- }
+ xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
+ ctx->start_lsn, abort);
list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
@@ -568,7 +563,7 @@ restart:
* It is still being pushed! Wait for the push to
* complete, then start again from the beginning.
*/
- sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
+ xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
goto restart;
}
}
@@ -592,7 +587,7 @@ restart:
*/
spin_lock(&cil->xc_cil_lock);
ctx->commit_lsn = commit_lsn;
- sv_broadcast(&cil->xc_commit_wait);
+ wake_up_all(&cil->xc_commit_wait);
spin_unlock(&cil->xc_cil_lock);
/* release the hounds! */
@@ -757,7 +752,7 @@ restart:
* It is still being pushed! Wait for the push to
* complete, then start again from the beginning.
*/
- sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
+ xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
goto restart;
}
if (ctx->sequence != sequence)
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index edcdfe0..d5f8be8 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -21,7 +21,6 @@
struct xfs_buf;
struct log;
struct xlog_ticket;
-struct xfs_buf_cancel;
struct xfs_mount;
/*
@@ -54,7 +53,6 @@ struct xfs_mount;
BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
-
static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block)
{
return ((xfs_lsn_t)cycle << 32) | block;
@@ -133,12 +131,10 @@ static inline uint xlog_get_client_id(__be32 i)
*/
#define XLOG_TIC_INITED 0x1 /* has been initialized */
#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */
-#define XLOG_TIC_IN_Q 0x4
#define XLOG_TIC_FLAGS \
{ XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \
- { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \
- { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" }
+ { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }
#endif /* __KERNEL__ */
@@ -244,9 +240,8 @@ typedef struct xlog_res {
} xlog_res_t;
typedef struct xlog_ticket {
- sv_t t_wait; /* ticket wait queue : 20 */
- struct xlog_ticket *t_next; /* :4|8 */
- struct xlog_ticket *t_prev; /* :4|8 */
+ wait_queue_head_t t_wait; /* ticket wait queue */
+ struct list_head t_queue; /* reserve/write queue */
xlog_tid_t t_tid; /* transaction identifier : 4 */
atomic_t t_ref; /* ticket reference count : 4 */
int t_curr_res; /* current reservation in bytes : 4 */
@@ -353,8 +348,8 @@ typedef union xlog_in_core2 {
* and move everything else out to subsequent cachelines.
*/
typedef struct xlog_in_core {
- sv_t ic_force_wait;
- sv_t ic_write_wait;
+ wait_queue_head_t ic_force_wait;
+ wait_queue_head_t ic_write_wait;
struct xlog_in_core *ic_next;
struct xlog_in_core *ic_prev;
struct xfs_buf *ic_bp;
@@ -421,7 +416,7 @@ struct xfs_cil {
struct xfs_cil_ctx *xc_ctx;
struct rw_semaphore xc_ctx_lock;
struct list_head xc_committing;
- sv_t xc_commit_wait;
+ wait_queue_head_t xc_commit_wait;
xfs_lsn_t xc_current_sequence;
};
@@ -491,7 +486,7 @@ typedef struct log {
struct xfs_buftarg *l_targ; /* buftarg of log */
uint l_flags;
uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
- struct xfs_buf_cancel **l_buf_cancel_table;
+ struct list_head *l_buf_cancel_table;
int l_iclog_hsize; /* size of iclog header */
int l_iclog_heads; /* # of iclog header sectors */
uint l_sectBBsize; /* sector size in BBs (2^n) */
@@ -503,29 +498,40 @@ typedef struct log {
int l_logBBsize; /* size of log in BB chunks */
/* The following block of fields are changed while holding icloglock */
- sv_t l_flush_wait ____cacheline_aligned_in_smp;
+ wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp;
/* waiting for iclog flush */
int l_covered_state;/* state of "covering disk
* log entries" */
xlog_in_core_t *l_iclog; /* head log queue */
spinlock_t l_icloglock; /* grab to change iclog state */
- xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed
- * buffers */
- xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */
int l_curr_cycle; /* Cycle number of log writes */
int l_prev_cycle; /* Cycle number before last
* block increment */
int l_curr_block; /* current logical log block */
int l_prev_block; /* previous logical log block */
- /* The following block of fields are changed while holding grant_lock */
- spinlock_t l_grant_lock ____cacheline_aligned_in_smp;
- xlog_ticket_t *l_reserve_headq;
- xlog_ticket_t *l_write_headq;
- int l_grant_reserve_cycle;
- int l_grant_reserve_bytes;
- int l_grant_write_cycle;
- int l_grant_write_bytes;
+ /*
+ * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
+ * read without needing to hold specific locks. To avoid operations
+ * contending with other hot objects, place each of them on a separate
+ * cacheline.
+ */
+ /* lsn of last LR on disk */
+ atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp;
+ /* lsn of 1st LR with unflushed * buffers */
+ atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
+
+ /*
+ * ticket grant locks, queues and accounting have their own cachlines
+ * as these are quite hot and can be operated on concurrently.
+ */
+ spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp;
+ struct list_head l_reserveq;
+ atomic64_t l_grant_reserve_head;
+
+ spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp;
+ struct list_head l_writeq;
+ atomic64_t l_grant_write_head;
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
@@ -534,6 +540,9 @@ typedef struct log {
} xlog_t;
+#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
+ ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
+
#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
/* common routines */
@@ -562,6 +571,61 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
xlog_in_core_t **commit_iclog, uint flags);
/*
+ * When we crack an atomic LSN, we sample it first so that the value will not
+ * change while we are cracking it into the component values. This means we
+ * will always get consistent component values to work from. This should always
+ * be used to smaple and crack LSNs taht are stored and updated in atomic
+ * variables.
+ */
+static inline void
+xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block)
+{
+ xfs_lsn_t val = atomic64_read(lsn);
+
+ *cycle = CYCLE_LSN(val);
+ *block = BLOCK_LSN(val);
+}
+
+/*
+ * Calculate and assign a value to an atomic LSN variable from component pieces.
+ */
+static inline void
+xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block)
+{
+ atomic64_set(lsn, xlog_assign_lsn(cycle, block));
+}
+
+/*
+ * When we crack the grant head, we sample it first so that the value will not
+ * change while we are cracking it into the component values. This means we
+ * will always get consistent component values to work from.
+ */
+static inline void
+xlog_crack_grant_head_val(int64_t val, int *cycle, int *space)
+{
+ *cycle = val >> 32;
+ *space = val & 0xffffffff;
+}
+
+static inline void
+xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space)
+{
+ xlog_crack_grant_head_val(atomic64_read(head), cycle, space);
+}
+
+static inline int64_t
+xlog_assign_grant_head_val(int cycle, int space)
+{
+ return ((int64_t)cycle << 32) | space;
+}
+
+static inline void
+xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
+{
+ atomic64_set(head, xlog_assign_grant_head_val(cycle, space));
+}
+
+/*
* Committed Item List interfaces
*/
int xlog_cil_init(struct log *log);
@@ -585,6 +649,21 @@ xlog_cil_force(struct log *log)
*/
#define XLOG_UNMOUNT_REC_TYPE (-1U)
+/*
+ * Wrapper function for waiting on a wait queue serialised against wakeups
+ * by a spinlock. This matches the semantics of all the wait queues used in the
+ * log code.
+ */
+static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(wq, &wait);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(lock);
+ schedule();
+ remove_wait_queue(wq, &wait);
+}
#endif /* __KERNEL__ */
#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 966d3f9..204d8e5 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -53,6 +53,17 @@ STATIC void xlog_recover_check_summary(xlog_t *);
#endif
/*
+ * This structure is used during recovery to record the buf log items which
+ * have been canceled and should not be replayed.
+ */
+struct xfs_buf_cancel {
+ xfs_daddr_t bc_blkno;
+ uint bc_len;
+ int bc_refcount;
+ struct list_head bc_list;
+};
+
+/*
* Sector aligned buffer routines for buffer create/read/write/access
*/
@@ -925,12 +936,12 @@ xlog_find_tail(
log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
if (found == 2)
log->l_curr_cycle++;
- log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn);
- log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn);
- log->l_grant_reserve_cycle = log->l_curr_cycle;
- log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
- log->l_grant_write_cycle = log->l_curr_cycle;
- log->l_grant_write_bytes = BBTOB(log->l_curr_block);
+ atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
+ atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+ xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle,
+ BBTOB(log->l_curr_block));
+ xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle,
+ BBTOB(log->l_curr_block));
/*
* Look for unmount record. If we find it, then we know there
@@ -960,7 +971,7 @@ xlog_find_tail(
}
after_umount_blk = (i + hblks + (int)
BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
- tail_lsn = log->l_tail_lsn;
+ tail_lsn = atomic64_read(&log->l_tail_lsn);
if (*head_blk == after_umount_blk &&
be32_to_cpu(rhead->h_num_logops) == 1) {
umount_data_blk = (i + hblks) % log->l_logBBsize;
@@ -975,12 +986,10 @@ xlog_find_tail(
* log records will point recovery to after the
* current unmount record.
*/
- log->l_tail_lsn =
- xlog_assign_lsn(log->l_curr_cycle,
- after_umount_blk);
- log->l_last_sync_lsn =
- xlog_assign_lsn(log->l_curr_cycle,
- after_umount_blk);
+ xlog_assign_atomic_lsn(&log->l_tail_lsn,
+ log->l_curr_cycle, after_umount_blk);
+ xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+ log->l_curr_cycle, after_umount_blk);
*tail_blk = after_umount_blk;
/*
@@ -1605,82 +1614,45 @@ xlog_recover_reorder_trans(
* record in the table to tell us how many times we expect to see this
* record during the second pass.
*/
-STATIC void
-xlog_recover_do_buffer_pass1(
- xlog_t *log,
- xfs_buf_log_format_t *buf_f)
+STATIC int
+xlog_recover_buffer_pass1(
+ struct log *log,
+ xlog_recover_item_t *item)
{
- xfs_buf_cancel_t *bcp;
- xfs_buf_cancel_t *nextp;
- xfs_buf_cancel_t *prevp;
- xfs_buf_cancel_t **bucket;
- xfs_daddr_t blkno = 0;
- uint len = 0;
- ushort flags = 0;
-
- switch (buf_f->blf_type) {
- case XFS_LI_BUF:
- blkno = buf_f->blf_blkno;
- len = buf_f->blf_len;
- flags = buf_f->blf_flags;
- break;
- }
+ xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
+ struct list_head *bucket;
+ struct xfs_buf_cancel *bcp;
/*
* If this isn't a cancel buffer item, then just return.
*/
- if (!(flags & XFS_BLF_CANCEL)) {
+ if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
trace_xfs_log_recover_buf_not_cancel(log, buf_f);
- return;
- }
-
- /*
- * Insert an xfs_buf_cancel record into the hash table of
- * them. If there is already an identical record, bump
- * its reference count.
- */
- bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
- XLOG_BC_TABLE_SIZE];
- /*
- * If the hash bucket is empty then just insert a new record into
- * the bucket.
- */
- if (*bucket == NULL) {
- bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t),
- KM_SLEEP);
- bcp->bc_blkno = blkno;
- bcp->bc_len = len;
- bcp->bc_refcount = 1;
- bcp->bc_next = NULL;
- *bucket = bcp;
- return;
+ return 0;
}
/*
- * The hash bucket is not empty, so search for duplicates of our
- * record. If we find one them just bump its refcount. If not
- * then add us at the end of the list.
+ * Insert an xfs_buf_cancel record into the hash table of them.
+ * If there is already an identical record, bump its reference count.
*/
- prevp = NULL;
- nextp = *bucket;
- while (nextp != NULL) {
- if (nextp->bc_blkno == blkno && nextp->bc_len == len) {
- nextp->bc_refcount++;
+ bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno);
+ list_for_each_entry(bcp, bucket, bc_list) {
+ if (bcp->bc_blkno == buf_f->blf_blkno &&
+ bcp->bc_len == buf_f->blf_len) {
+ bcp->bc_refcount++;
trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
- return;
+ return 0;
}
- prevp = nextp;
- nextp = nextp->bc_next;
- }
- ASSERT(prevp != NULL);
- bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t),
- KM_SLEEP);
- bcp->bc_blkno = blkno;
- bcp->bc_len = len;
+ }
+
+ bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP);
+ bcp->bc_blkno = buf_f->blf_blkno;
+ bcp->bc_len = buf_f->blf_len;
bcp->bc_refcount = 1;
- bcp->bc_next = NULL;
- prevp->bc_next = bcp;
+ list_add_tail(&bcp->bc_list, bucket);
+
trace_xfs_log_recover_buf_cancel_add(log, buf_f);
+ return 0;
}
/*
@@ -1698,14 +1670,13 @@ xlog_recover_do_buffer_pass1(
*/
STATIC int
xlog_check_buffer_cancelled(
- xlog_t *log,
+ struct log *log,
xfs_daddr_t blkno,
uint len,
ushort flags)
{
- xfs_buf_cancel_t *bcp;
- xfs_buf_cancel_t *prevp;
- xfs_buf_cancel_t **bucket;
+ struct list_head *bucket;
+ struct xfs_buf_cancel *bcp;
if (log->l_buf_cancel_table == NULL) {
/*
@@ -1716,128 +1687,70 @@ xlog_check_buffer_cancelled(
return 0;
}
- bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
- XLOG_BC_TABLE_SIZE];
- bcp = *bucket;
- if (bcp == NULL) {
- /*
- * There is no corresponding entry in the table built
- * in pass one, so this buffer has not been cancelled.
- */
- ASSERT(!(flags & XFS_BLF_CANCEL));
- return 0;
- }
-
/*
- * Search for an entry in the buffer cancel table that
- * matches our buffer.
+ * Search for an entry in the cancel table that matches our buffer.
*/
- prevp = NULL;
- while (bcp != NULL) {
- if (bcp->bc_blkno == blkno && bcp->bc_len == len) {
- /*
- * We've go a match, so return 1 so that the
- * recovery of this buffer is cancelled.
- * If this buffer is actually a buffer cancel
- * log item, then decrement the refcount on the
- * one in the table and remove it if this is the
- * last reference.
- */
- if (flags & XFS_BLF_CANCEL) {
- bcp->bc_refcount--;
- if (bcp->bc_refcount == 0) {
- if (prevp == NULL) {
- *bucket = bcp->bc_next;
- } else {
- prevp->bc_next = bcp->bc_next;
- }
- kmem_free(bcp);
- }
- }
- return 1;
- }
- prevp = bcp;
- bcp = bcp->bc_next;
+ bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
+ list_for_each_entry(bcp, bucket, bc_list) {
+ if (bcp->bc_blkno == blkno && bcp->bc_len == len)
+ goto found;
}
+
/*
- * We didn't find a corresponding entry in the table, so
- * return 0 so that the buffer is NOT cancelled.
+ * We didn't find a corresponding entry in the table, so return 0 so
+ * that the buffer is NOT cancelled.
*/
ASSERT(!(flags & XFS_BLF_CANCEL));
return 0;
-}
-STATIC int
-xlog_recover_do_buffer_pass2(
- xlog_t *log,
- xfs_buf_log_format_t *buf_f)
-{
- xfs_daddr_t blkno = 0;
- ushort flags = 0;
- uint len = 0;
-
- switch (buf_f->blf_type) {
- case XFS_LI_BUF:
- blkno = buf_f->blf_blkno;
- flags = buf_f->blf_flags;
- len = buf_f->blf_len;
- break;
+found:
+ /*
+ * We've go a match, so return 1 so that the recovery of this buffer
+ * is cancelled. If this buffer is actually a buffer cancel log
+ * item, then decrement the refcount on the one in the table and
+ * remove it if this is the last reference.
+ */
+ if (flags & XFS_BLF_CANCEL) {
+ if (--bcp->bc_refcount == 0) {
+ list_del(&bcp->bc_list);
+ kmem_free(bcp);
+ }
}
-
- return xlog_check_buffer_cancelled(log, blkno, len, flags);
+ return 1;
}
/*
- * Perform recovery for a buffer full of inodes. In these buffers,
- * the only data which should be recovered is that which corresponds
- * to the di_next_unlinked pointers in the on disk inode structures.
- * The rest of the data for the inodes is always logged through the
- * inodes themselves rather than the inode buffer and is recovered
- * in xlog_recover_do_inode_trans().
+ * Perform recovery for a buffer full of inodes. In these buffers, the only
+ * data which should be recovered is that which corresponds to the
+ * di_next_unlinked pointers in the on disk inode structures. The rest of the
+ * data for the inodes is always logged through the inodes themselves rather
+ * than the inode buffer and is recovered in xlog_recover_inode_pass2().
*
- * The only time when buffers full of inodes are fully recovered is
- * when the buffer is full of newly allocated inodes. In this case
- * the buffer will not be marked as an inode buffer and so will be
- * sent to xlog_recover_do_reg_buffer() below during recovery.
+ * The only time when buffers full of inodes are fully recovered is when the
+ * buffer is full of newly allocated inodes. In this case the buffer will
+ * not be marked as an inode buffer and so will be sent to
+ * xlog_recover_do_reg_buffer() below during recovery.
*/
STATIC int
xlog_recover_do_inode_buffer(
- xfs_mount_t *mp,
+ struct xfs_mount *mp,
xlog_recover_item_t *item,
- xfs_buf_t *bp,
+ struct xfs_buf *bp,
xfs_buf_log_format_t *buf_f)
{
int i;
- int item_index;
- int bit;
- int nbits;
- int reg_buf_offset;
- int reg_buf_bytes;
+ int item_index = 0;
+ int bit = 0;
+ int nbits = 0;
+ int reg_buf_offset = 0;
+ int reg_buf_bytes = 0;
int next_unlinked_offset;
int inodes_per_buf;
xfs_agino_t *logged_nextp;
xfs_agino_t *buffer_nextp;
- unsigned int *data_map = NULL;
- unsigned int map_size = 0;
trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
- switch (buf_f->blf_type) {
- case XFS_LI_BUF:
- data_map = buf_f->blf_data_map;
- map_size = buf_f->blf_map_size;
- break;
- }
- /*
- * Set the variables corresponding to the current region to
- * 0 so that we'll initialize them on the first pass through
- * the loop.
- */
- reg_buf_offset = 0;
- reg_buf_bytes = 0;
- bit = 0;
- nbits = 0;
- item_index = 0;
inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
for (i = 0; i < inodes_per_buf; i++) {
next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
@@ -1852,18 +1765,18 @@ xlog_recover_do_inode_buffer(
* the current di_next_unlinked field.
*/
bit += nbits;
- bit = xfs_next_bit(data_map, map_size, bit);
+ bit = xfs_next_bit(buf_f->blf_data_map,
+ buf_f->blf_map_size, bit);
/*
* If there are no more logged regions in the
* buffer, then we're done.
*/
- if (bit == -1) {
+ if (bit == -1)
return 0;
- }
- nbits = xfs_contig_bits(data_map, map_size,
- bit);
+ nbits = xfs_contig_bits(buf_f->blf_data_map,
+ buf_f->blf_map_size, bit);
ASSERT(nbits > 0);
reg_buf_offset = bit << XFS_BLF_SHIFT;
reg_buf_bytes = nbits << XFS_BLF_SHIFT;
@@ -1875,9 +1788,8 @@ xlog_recover_do_inode_buffer(
* di_next_unlinked field, then move on to the next
* di_next_unlinked field.
*/
- if (next_unlinked_offset < reg_buf_offset) {
+ if (next_unlinked_offset < reg_buf_offset)
continue;
- }
ASSERT(item->ri_buf[item_index].i_addr != NULL);
ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
@@ -1913,36 +1825,29 @@ xlog_recover_do_inode_buffer(
* given buffer. The bitmap in the buf log format structure indicates
* where to place the logged data.
*/
-/*ARGSUSED*/
STATIC void
xlog_recover_do_reg_buffer(
struct xfs_mount *mp,
xlog_recover_item_t *item,
- xfs_buf_t *bp,
+ struct xfs_buf *bp,
xfs_buf_log_format_t *buf_f)
{
int i;
int bit;
int nbits;
- unsigned int *data_map = NULL;
- unsigned int map_size = 0;
int error;
trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
- switch (buf_f->blf_type) {
- case XFS_LI_BUF:
- data_map = buf_f->blf_data_map;
- map_size = buf_f->blf_map_size;
- break;
- }
bit = 0;
i = 1; /* 0 is the buf format structure */
while (1) {
- bit = xfs_next_bit(data_map, map_size, bit);
+ bit = xfs_next_bit(buf_f->blf_data_map,
+ buf_f->blf_map_size, bit);
if (bit == -1)
break;
- nbits = xfs_contig_bits(data_map, map_size, bit);
+ nbits = xfs_contig_bits(buf_f->blf_data_map,
+ buf_f->blf_map_size, bit);
ASSERT(nbits > 0);
ASSERT(item->ri_buf[i].i_addr != NULL);
ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
@@ -2176,77 +2081,46 @@ xlog_recover_do_dquot_buffer(
* for more details on the implementation of the table of cancel records.
*/
STATIC int
-xlog_recover_do_buffer_trans(
+xlog_recover_buffer_pass2(
xlog_t *log,
- xlog_recover_item_t *item,
- int pass)
+ xlog_recover_item_t *item)
{
xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
- xfs_mount_t *mp;
+ xfs_mount_t *mp = log->l_mp;
xfs_buf_t *bp;
int error;
- int cancel;
- xfs_daddr_t blkno;
- int len;
- ushort flags;
uint buf_flags;
- if (pass == XLOG_RECOVER_PASS1) {
- /*
- * In this pass we're only looking for buf items
- * with the XFS_BLF_CANCEL bit set.
- */
- xlog_recover_do_buffer_pass1(log, buf_f);
+ /*
+ * In this pass we only want to recover all the buffers which have
+ * not been cancelled and are not cancellation buffers themselves.
+ */
+ if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno,
+ buf_f->blf_len, buf_f->blf_flags)) {
+ trace_xfs_log_recover_buf_cancel(log, buf_f);
return 0;
- } else {
- /*
- * In this pass we want to recover all the buffers
- * which have not been cancelled and are not
- * cancellation buffers themselves. The routine
- * we call here will tell us whether or not to
- * continue with the replay of this buffer.
- */
- cancel = xlog_recover_do_buffer_pass2(log, buf_f);
- if (cancel) {
- trace_xfs_log_recover_buf_cancel(log, buf_f);
- return 0;
- }
}
+
trace_xfs_log_recover_buf_recover(log, buf_f);
- switch (buf_f->blf_type) {
- case XFS_LI_BUF:
- blkno = buf_f->blf_blkno;
- len = buf_f->blf_len;
- flags = buf_f->blf_flags;
- break;
- default:
- xfs_fs_cmn_err(CE_ALERT, log->l_mp,
- "xfs_log_recover: unknown buffer type 0x%x, logdev %s",
- buf_f->blf_type, log->l_mp->m_logname ?
- log->l_mp->m_logname : "internal");
- XFS_ERROR_REPORT("xlog_recover_do_buffer_trans",
- XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
- }
- mp = log->l_mp;
buf_flags = XBF_LOCK;
- if (!(flags & XFS_BLF_INODE_BUF))
+ if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF))
buf_flags |= XBF_MAPPED;
- bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
+ bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
+ buf_flags);
if (XFS_BUF_ISERROR(bp)) {
- xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp,
- bp, blkno);
+ xfs_ioerror_alert("xlog_recover_do..(read#1)", mp,
+ bp, buf_f->blf_blkno);
error = XFS_BUF_GETERROR(bp);
xfs_buf_relse(bp);
return error;
}
error = 0;
- if (flags & XFS_BLF_INODE_BUF) {
+ if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
- } else if (flags &
+ } else if (buf_f->blf_flags &
(XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
} else {
@@ -2286,16 +2160,14 @@ xlog_recover_do_buffer_trans(
}
STATIC int
-xlog_recover_do_inode_trans(
+xlog_recover_inode_pass2(
xlog_t *log,
- xlog_recover_item_t *item,
- int pass)
+ xlog_recover_item_t *item)
{
xfs_inode_log_format_t *in_f;
- xfs_mount_t *mp;
+ xfs_mount_t *mp = log->l_mp;
xfs_buf_t *bp;
xfs_dinode_t *dip;
- xfs_ino_t ino;
int len;
xfs_caddr_t src;
xfs_caddr_t dest;
@@ -2305,10 +2177,6 @@ xlog_recover_do_inode_trans(
xfs_icdinode_t *dicp;
int need_free = 0;
- if (pass == XLOG_RECOVER_PASS1) {
- return 0;
- }
-
if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
in_f = item->ri_buf[0].i_addr;
} else {
@@ -2318,8 +2186,6 @@ xlog_recover_do_inode_trans(
if (error)
goto error;
}
- ino = in_f->ilf_ino;
- mp = log->l_mp;
/*
* Inode buffers can be freed, look out for it,
@@ -2354,8 +2220,8 @@ xlog_recover_do_inode_trans(
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld",
- dip, bp, ino);
- XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)",
+ dip, bp, in_f->ilf_ino);
+ XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
XFS_ERRLEVEL_LOW, mp);
error = EFSCORRUPTED;
goto error;
@@ -2365,8 +2231,8 @@ xlog_recover_do_inode_trans(
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld",
- item, ino);
- XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)",
+ item, in_f->ilf_ino);
+ XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
XFS_ERRLEVEL_LOW, mp);
error = EFSCORRUPTED;
goto error;
@@ -2394,12 +2260,12 @@ xlog_recover_do_inode_trans(
if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) {
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dicp->di_format != XFS_DINODE_FMT_BTREE)) {
- XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)",
+ XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
XFS_ERRLEVEL_LOW, mp, dicp);
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
- item, dip, bp, ino);
+ item, dip, bp, in_f->ilf_ino);
error = EFSCORRUPTED;
goto error;
}
@@ -2407,40 +2273,40 @@ xlog_recover_do_inode_trans(
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dicp->di_format != XFS_DINODE_FMT_BTREE) &&
(dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
- XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)",
+ XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
XFS_ERRLEVEL_LOW, mp, dicp);
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
- item, dip, bp, ino);
+ item, dip, bp, in_f->ilf_ino);
error = EFSCORRUPTED;
goto error;
}
}
if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
- XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)",
+ XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
XFS_ERRLEVEL_LOW, mp, dicp);
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
- item, dip, bp, ino,
+ item, dip, bp, in_f->ilf_ino,
dicp->di_nextents + dicp->di_anextents,
dicp->di_nblocks);
error = EFSCORRUPTED;
goto error;
}
if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
- XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)",
+ XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
XFS_ERRLEVEL_LOW, mp, dicp);
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x",
- item, dip, bp, ino, dicp->di_forkoff);
+ item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
error = EFSCORRUPTED;
goto error;
}
if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
- XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)",
+ XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
XFS_ERRLEVEL_LOW, mp, dicp);
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
@@ -2532,7 +2398,7 @@ xlog_recover_do_inode_trans(
break;
default:
- xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag");
+ xlog_warn("XFS: xlog_recover_inode_pass2: Invalid flag");
ASSERT(0);
xfs_buf_relse(bp);
error = EIO;
@@ -2556,18 +2422,11 @@ error:
* of that type.
*/
STATIC int
-xlog_recover_do_quotaoff_trans(
+xlog_recover_quotaoff_pass1(
xlog_t *log,
- xlog_recover_item_t *item,
- int pass)
+ xlog_recover_item_t *item)
{
- xfs_qoff_logformat_t *qoff_f;
-
- if (pass == XLOG_RECOVER_PASS2) {
- return (0);
- }
-
- qoff_f = item->ri_buf[0].i_addr;
+ xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr;
ASSERT(qoff_f);
/*
@@ -2588,22 +2447,17 @@ xlog_recover_do_quotaoff_trans(
* Recover a dquot record
*/
STATIC int
-xlog_recover_do_dquot_trans(
+xlog_recover_dquot_pass2(
xlog_t *log,
- xlog_recover_item_t *item,
- int pass)
+ xlog_recover_item_t *item)
{
- xfs_mount_t *mp;
+ xfs_mount_t *mp = log->l_mp;
xfs_buf_t *bp;
struct xfs_disk_dquot *ddq, *recddq;
int error;
xfs_dq_logformat_t *dq_f;
uint type;
- if (pass == XLOG_RECOVER_PASS1) {
- return 0;
- }
- mp = log->l_mp;
/*
* Filesystems are required to send in quota flags at mount time.
@@ -2647,7 +2501,7 @@ xlog_recover_do_dquot_trans(
if ((error = xfs_qm_dqcheck(recddq,
dq_f->qlf_id,
0, XFS_QMOPT_DOWARN,
- "xlog_recover_do_dquot_trans (log copy)"))) {
+ "xlog_recover_dquot_pass2 (log copy)"))) {
return XFS_ERROR(EIO);
}
ASSERT(dq_f->qlf_len == 1);
@@ -2670,7 +2524,7 @@ xlog_recover_do_dquot_trans(
* minimal initialization then.
*/
if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
- "xlog_recover_do_dquot_trans")) {
+ "xlog_recover_dquot_pass2")) {
xfs_buf_relse(bp);
return XFS_ERROR(EIO);
}
@@ -2693,38 +2547,31 @@ xlog_recover_do_dquot_trans(
* LSN.
*/
STATIC int
-xlog_recover_do_efi_trans(
+xlog_recover_efi_pass2(
xlog_t *log,
xlog_recover_item_t *item,
- xfs_lsn_t lsn,
- int pass)
+ xfs_lsn_t lsn)
{
int error;
- xfs_mount_t *mp;
+ xfs_mount_t *mp = log->l_mp;
xfs_efi_log_item_t *efip;
xfs_efi_log_format_t *efi_formatp;
- if (pass == XLOG_RECOVER_PASS1) {
- return 0;
- }
-
efi_formatp = item->ri_buf[0].i_addr;
- mp = log->l_mp;
efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
&(efip->efi_format)))) {
xfs_efi_item_free(efip);
return error;
}
- efip->efi_next_extent = efi_formatp->efi_nextents;
- efip->efi_flags |= XFS_EFI_COMMITTED;
+ atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
spin_lock(&log->l_ailp->xa_lock);
/*
* xfs_trans_ail_update() drops the AIL lock.
*/
- xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn);
+ xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
return 0;
}
@@ -2737,11 +2584,10 @@ xlog_recover_do_efi_trans(
* efd format structure. If we find it, we remove the efi from the
* AIL and free it.
*/
-STATIC void
-xlog_recover_do_efd_trans(
+STATIC int
+xlog_recover_efd_pass2(
xlog_t *log,
- xlog_recover_item_t *item,
- int pass)
+ xlog_recover_item_t *item)
{
xfs_efd_log_format_t *efd_formatp;
xfs_efi_log_item_t *efip = NULL;
@@ -2750,10 +2596,6 @@ xlog_recover_do_efd_trans(
struct xfs_ail_cursor cur;
struct xfs_ail *ailp = log->l_ailp;
- if (pass == XLOG_RECOVER_PASS1) {
- return;
- }
-
efd_formatp = item->ri_buf[0].i_addr;
ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
@@ -2785,62 +2627,6 @@ xlog_recover_do_efd_trans(
}
xfs_trans_ail_cursor_done(ailp, &cur);
spin_unlock(&ailp->xa_lock);
-}
-
-/*
- * Perform the transaction
- *
- * If the transaction modifies a buffer or inode, do it now. Otherwise,
- * EFIs and EFDs get queued up by adding entries into the AIL for them.
- */
-STATIC int
-xlog_recover_do_trans(
- xlog_t *log,
- xlog_recover_t *trans,
- int pass)
-{
- int error = 0;
- xlog_recover_item_t *item;
-
- error = xlog_recover_reorder_trans(log, trans, pass);
- if (error)
- return error;
-
- list_for_each_entry(item, &trans->r_itemq, ri_list) {
- trace_xfs_log_recover_item_recover(log, trans, item, pass);
- switch (ITEM_TYPE(item)) {
- case XFS_LI_BUF:
- error = xlog_recover_do_buffer_trans(log, item, pass);
- break;
- case XFS_LI_INODE:
- error = xlog_recover_do_inode_trans(log, item, pass);
- break;
- case XFS_LI_EFI:
- error = xlog_recover_do_efi_trans(log, item,
- trans->r_lsn, pass);
- break;
- case XFS_LI_EFD:
- xlog_recover_do_efd_trans(log, item, pass);
- error = 0;
- break;
- case XFS_LI_DQUOT:
- error = xlog_recover_do_dquot_trans(log, item, pass);
- break;
- case XFS_LI_QUOTAOFF:
- error = xlog_recover_do_quotaoff_trans(log, item,
- pass);
- break;
- default:
- xlog_warn(
- "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item));
- ASSERT(0);
- error = XFS_ERROR(EIO);
- break;
- }
-
- if (error)
- return error;
- }
return 0;
}
@@ -2852,7 +2638,7 @@ xlog_recover_do_trans(
*/
STATIC void
xlog_recover_free_trans(
- xlog_recover_t *trans)
+ struct xlog_recover *trans)
{
xlog_recover_item_t *item, *n;
int i;
@@ -2871,17 +2657,95 @@ xlog_recover_free_trans(
}
STATIC int
+xlog_recover_commit_pass1(
+ struct log *log,
+ struct xlog_recover *trans,
+ xlog_recover_item_t *item)
+{
+ trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
+
+ switch (ITEM_TYPE(item)) {
+ case XFS_LI_BUF:
+ return xlog_recover_buffer_pass1(log, item);
+ case XFS_LI_QUOTAOFF:
+ return xlog_recover_quotaoff_pass1(log, item);
+ case XFS_LI_INODE:
+ case XFS_LI_EFI:
+ case XFS_LI_EFD:
+ case XFS_LI_DQUOT:
+ /* nothing to do in pass 1 */
+ return 0;
+ default:
+ xlog_warn(
+ "XFS: invalid item type (%d) xlog_recover_commit_pass1",
+ ITEM_TYPE(item));
+ ASSERT(0);
+ return XFS_ERROR(EIO);
+ }
+}
+
+STATIC int
+xlog_recover_commit_pass2(
+ struct log *log,
+ struct xlog_recover *trans,
+ xlog_recover_item_t *item)
+{
+ trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
+
+ switch (ITEM_TYPE(item)) {
+ case XFS_LI_BUF:
+ return xlog_recover_buffer_pass2(log, item);
+ case XFS_LI_INODE:
+ return xlog_recover_inode_pass2(log, item);
+ case XFS_LI_EFI:
+ return xlog_recover_efi_pass2(log, item, trans->r_lsn);
+ case XFS_LI_EFD:
+ return xlog_recover_efd_pass2(log, item);
+ case XFS_LI_DQUOT:
+ return xlog_recover_dquot_pass2(log, item);
+ case XFS_LI_QUOTAOFF:
+ /* nothing to do in pass2 */
+ return 0;
+ default:
+ xlog_warn(
+ "XFS: invalid item type (%d) xlog_recover_commit_pass2",
+ ITEM_TYPE(item));
+ ASSERT(0);
+ return XFS_ERROR(EIO);
+ }
+}
+
+/*
+ * Perform the transaction.
+ *
+ * If the transaction modifies a buffer or inode, do it now. Otherwise,
+ * EFIs and EFDs get queued up by adding entries into the AIL for them.
+ */
+STATIC int
xlog_recover_commit_trans(
- xlog_t *log,
- xlog_recover_t *trans,
+ struct log *log,
+ struct xlog_recover *trans,
int pass)
{
- int error;
+ int error = 0;
+ xlog_recover_item_t *item;
hlist_del(&trans->r_list);
- if ((error = xlog_recover_do_trans(log, trans, pass)))
+
+ error = xlog_recover_reorder_trans(log, trans, pass);
+ if (error)
return error;
- xlog_recover_free_trans(trans); /* no error */
+
+ list_for_each_entry(item, &trans->r_itemq, ri_list) {
+ if (pass == XLOG_RECOVER_PASS1)
+ error = xlog_recover_commit_pass1(log, trans, item);
+ else
+ error = xlog_recover_commit_pass2(log, trans, item);
+ if (error)
+ return error;
+ }
+
+ xlog_recover_free_trans(trans);
return 0;
}
@@ -3011,7 +2875,7 @@ xlog_recover_process_efi(
xfs_extent_t *extp;
xfs_fsblock_t startblock_fsb;
- ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED));
+ ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
/*
* First check the validity of the extents described by the
@@ -3050,7 +2914,7 @@ xlog_recover_process_efi(
extp->ext_len);
}
- efip->efi_flags |= XFS_EFI_RECOVERED;
+ set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
error = xfs_trans_commit(tp, 0);
return error;
@@ -3107,7 +2971,7 @@ xlog_recover_process_efis(
* Skip EFIs that we've already processed.
*/
efip = (xfs_efi_log_item_t *)lip;
- if (efip->efi_flags & XFS_EFI_RECOVERED) {
+ if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
lip = xfs_trans_ail_cursor_next(ailp, &cur);
continue;
}
@@ -3724,7 +3588,7 @@ xlog_do_log_recovery(
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk)
{
- int error;
+ int error, i;
ASSERT(head_blk != tail_blk);
@@ -3732,10 +3596,12 @@ xlog_do_log_recovery(
* First do a pass to find all of the cancelled buf log items.
* Store them in the buf_cancel_table for use in the second pass.
*/
- log->l_buf_cancel_table =
- (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE *
- sizeof(xfs_buf_cancel_t*),
+ log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
+ sizeof(struct list_head),
KM_SLEEP);
+ for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
+ INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
+
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
XLOG_RECOVER_PASS1);
if (error != 0) {
@@ -3754,7 +3620,7 @@ xlog_do_log_recovery(
int i;
for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
- ASSERT(log->l_buf_cancel_table[i] == NULL);
+ ASSERT(list_empty(&log->l_buf_cancel_table[i]));
}
#endif /* DEBUG */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b1498ab..d447aef 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -275,6 +275,7 @@ xfs_free_perag(
pag = radix_tree_delete(&mp->m_perag_tree, agno);
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
+ ASSERT(atomic_read(&pag->pag_ref) == 0);
call_rcu(&pag->rcu_head, __xfs_free_perag);
}
}
@@ -471,7 +472,7 @@ xfs_initialize_perag(
goto out_unwind;
pag->pag_agno = index;
pag->pag_mount = mp;
- rwlock_init(&pag->pag_ici_lock);
+ spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
spin_lock_init(&pag->pag_buf_lock);
@@ -974,6 +975,24 @@ xfs_set_rw_sizes(xfs_mount_t *mp)
}
/*
+ * precalculate the low space thresholds for dynamic speculative preallocation.
+ */
+void
+xfs_set_low_space_thresholds(
+ struct xfs_mount *mp)
+{
+ int i;
+
+ for (i = 0; i < XFS_LOWSP_MAX; i++) {
+ __uint64_t space = mp->m_sb.sb_dblocks;
+
+ do_div(space, 100);
+ mp->m_low_space[i] = space * (i + 1);
+ }
+}
+
+
+/*
* Set whether we're using inode alignment.
*/
STATIC void
@@ -1195,6 +1214,9 @@ xfs_mountfs(
*/
xfs_set_rw_sizes(mp);
+ /* set the low space thresholds for dynamic preallocation */
+ xfs_set_low_space_thresholds(mp);
+
/*
* Set the inode cluster size.
* This may still be overridden by the file system
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5861b49..a62e897 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -103,6 +103,16 @@ extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
xfs_mod_incore_sb(mp, field, delta, rsvd)
#endif
+/* dynamic preallocation free space thresholds, 5% down to 1% */
+enum {
+ XFS_LOWSP_1_PCNT = 0,
+ XFS_LOWSP_2_PCNT,
+ XFS_LOWSP_3_PCNT,
+ XFS_LOWSP_4_PCNT,
+ XFS_LOWSP_5_PCNT,
+ XFS_LOWSP_MAX,
+};
+
typedef struct xfs_mount {
struct super_block *m_super;
xfs_tid_t m_tid; /* next unused tid for fs */
@@ -202,6 +212,8 @@ typedef struct xfs_mount {
__int64_t m_update_flags; /* sb flags we need to update
on the next remount,rw */
struct shrinker m_inode_shrink; /* inode reclaim shrinker */
+ int64_t m_low_space[XFS_LOWSP_MAX];
+ /* low free space thresholds */
} xfs_mount_t;
/*
@@ -379,6 +391,8 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
+extern void xfs_set_low_space_thresholds(struct xfs_mount *);
+
#endif /* __KERNEL__ */
extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 45ce15d..edfa178 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -408,7 +408,7 @@ xfs_mru_cache_flush(
spin_lock(&mru->lock);
if (mru->queued) {
spin_unlock(&mru->lock);
- cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
+ cancel_delayed_work_sync(&mru->work);
spin_lock(&mru->lock);
}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index e0e64b1..9bb6eda 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -346,8 +346,17 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
#define xfs_trans_apply_dquot_deltas(tp)
#define xfs_trans_unreserve_and_mod_dquots(tp)
-#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0)
-#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0)
+static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
+ struct xfs_inode *ip, long nblks, long ninos, uint flags)
+{
+ return 0;
+}
+static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
+ struct xfs_mount *mp, struct xfs_dquot *udqp,
+ struct xfs_dquot *gdqp, long nblks, long nions, uint flags)
+{
+ return 0;
+}
#define xfs_qm_vop_create_dqattach(tp, ip, u, g)
#define xfs_qm_vop_rename_dqattach(it) (0)
#define xfs_qm_vop_chown(tp, ip, old, new) (NULL)
@@ -357,11 +366,14 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
#define xfs_qm_dqdetach(ip)
#define xfs_qm_dqrele(d)
#define xfs_qm_statvfs(ip, s)
-#define xfs_qm_sync(mp, fl) (0)
+static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
+{
+ return 0;
+}
#define xfs_qm_newmount(mp, a, b) (0)
#define xfs_qm_mount_quotas(mp)
#define xfs_qm_unmount(mp)
-#define xfs_qm_unmount_quotas(mp) (0)
+#define xfs_qm_unmount_quotas(mp)
#endif /* CONFIG_XFS_QUOTA */
#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d2af0a8..77a5989 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -297,6 +297,7 @@ xfs_rename(
* it and some incremental backup programs won't work without it.
*/
xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
/*
* Adjust the link count on src_dp. This is necessary when
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index f6d956b..f80a067 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1350,7 +1350,7 @@ xfs_trans_fill_vecs(
* they could be immediately flushed and we'd have to race with the flusher
* trying to pull the item from the AIL as we add it.
*/
-void
+static void
xfs_trans_item_committed(
struct xfs_log_item *lip,
xfs_lsn_t commit_lsn,
@@ -1425,6 +1425,83 @@ xfs_trans_committed(
xfs_trans_free(tp);
}
+static inline void
+xfs_log_item_batch_insert(
+ struct xfs_ail *ailp,
+ struct xfs_log_item **log_items,
+ int nr_items,
+ xfs_lsn_t commit_lsn)
+{
+ int i;
+
+ spin_lock(&ailp->xa_lock);
+ /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
+ xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
+
+ for (i = 0; i < nr_items; i++)
+ IOP_UNPIN(log_items[i], 0);
+}
+
+/*
+ * Bulk operation version of xfs_trans_committed that takes a log vector of
+ * items to insert into the AIL. This uses bulk AIL insertion techniques to
+ * minimise lock traffic.
+ */
+void
+xfs_trans_committed_bulk(
+ struct xfs_ail *ailp,
+ struct xfs_log_vec *log_vector,
+ xfs_lsn_t commit_lsn,
+ int aborted)
+{
+#define LOG_ITEM_BATCH_SIZE 32
+ struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
+ struct xfs_log_vec *lv;
+ int i = 0;
+
+ /* unpin all the log items */
+ for (lv = log_vector; lv; lv = lv->lv_next ) {
+ struct xfs_log_item *lip = lv->lv_item;
+ xfs_lsn_t item_lsn;
+
+ if (aborted)
+ lip->li_flags |= XFS_LI_ABORTED;
+ item_lsn = IOP_COMMITTED(lip, commit_lsn);
+
+ /* item_lsn of -1 means the item was freed */
+ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
+ continue;
+
+ if (item_lsn != commit_lsn) {
+
+ /*
+ * Not a bulk update option due to unusual item_lsn.
+ * Push into AIL immediately, rechecking the lsn once
+ * we have the ail lock. Then unpin the item.
+ */
+ spin_lock(&ailp->xa_lock);
+ if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
+ xfs_trans_ail_update(ailp, lip, item_lsn);
+ else
+ spin_unlock(&ailp->xa_lock);
+ IOP_UNPIN(lip, 0);
+ continue;
+ }
+
+ /* Item is a candidate for bulk AIL insert. */
+ log_items[i++] = lv->lv_item;
+ if (i >= LOG_ITEM_BATCH_SIZE) {
+ xfs_log_item_batch_insert(ailp, log_items,
+ LOG_ITEM_BATCH_SIZE, commit_lsn);
+ i = 0;
+ }
+ }
+
+ /* make sure we insert the remainder! */
+ if (i)
+ xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
+}
+
/*
* Called from the trans_commit code when we notice that
* the filesystem is in the middle of a forced shutdown.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 246286b..c2042b7 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -294,8 +294,8 @@ struct xfs_log_item_desc {
#define XFS_ALLOC_BTREE_REF 2
#define XFS_BMAP_BTREE_REF 2
#define XFS_DIR_BTREE_REF 2
+#define XFS_INO_REF 2
#define XFS_ATTR_BTREE_REF 1
-#define XFS_INO_REF 1
#define XFS_DQUOT_REF 1
#ifdef __KERNEL__
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index dc90695..c5bbbc4 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,8 +28,8 @@
#include "xfs_trans_priv.h"
#include "xfs_error.h"
-STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *);
-STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
+STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t);
+STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
@@ -449,129 +449,152 @@ xfs_trans_unlocked_item(
xfs_log_move_tail(ailp->xa_mount, 1);
} /* xfs_trans_unlocked_item */
-
/*
- * Update the position of the item in the AIL with the new
- * lsn. If it is not yet in the AIL, add it. Otherwise, move
- * it to its new position by removing it and re-adding it.
+ * xfs_trans_ail_update - bulk AIL insertion operation.
+ *
+ * @xfs_trans_ail_update takes an array of log items that all need to be
+ * positioned at the same LSN in the AIL. If an item is not in the AIL, it will
+ * be added. Otherwise, it will be repositioned by removing it and re-adding
+ * it to the AIL. If we move the first item in the AIL, update the log tail to
+ * match the new minimum LSN in the AIL.
*
- * Wakeup anyone with an lsn less than the item's lsn. If the item
- * we move in the AIL is the minimum one, update the tail lsn in the
- * log manager.
+ * This function takes the AIL lock once to execute the update operations on
+ * all the items in the array, and as such should not be called with the AIL
+ * lock held. As a result, once we have the AIL lock, we need to check each log
+ * item LSN to confirm it needs to be moved forward in the AIL.
*
- * This function must be called with the AIL lock held. The lock
- * is dropped before returning.
+ * To optimise the insert operation, we delete all the items from the AIL in
+ * the first pass, moving them into a temporary list, then splice the temporary
+ * list into the correct position in the AIL. This avoids needing to do an
+ * insert operation on every item.
+ *
+ * This function must be called with the AIL lock held. The lock is dropped
+ * before returning.
*/
void
-xfs_trans_ail_update(
- struct xfs_ail *ailp,
- xfs_log_item_t *lip,
- xfs_lsn_t lsn) __releases(ailp->xa_lock)
+xfs_trans_ail_update_bulk(
+ struct xfs_ail *ailp,
+ struct xfs_log_item **log_items,
+ int nr_items,
+ xfs_lsn_t lsn) __releases(ailp->xa_lock)
{
- xfs_log_item_t *dlip = NULL;
- xfs_log_item_t *mlip; /* ptr to minimum lip */
+ xfs_log_item_t *mlip;
xfs_lsn_t tail_lsn;
+ int mlip_changed = 0;
+ int i;
+ LIST_HEAD(tmp);
mlip = xfs_ail_min(ailp);
- if (lip->li_flags & XFS_LI_IN_AIL) {
- dlip = xfs_ail_delete(ailp, lip);
- ASSERT(dlip == lip);
- xfs_trans_ail_cursor_clear(ailp, dlip);
- } else {
- lip->li_flags |= XFS_LI_IN_AIL;
+ for (i = 0; i < nr_items; i++) {
+ struct xfs_log_item *lip = log_items[i];
+ if (lip->li_flags & XFS_LI_IN_AIL) {
+ /* check if we really need to move the item */
+ if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
+ continue;
+
+ xfs_ail_delete(ailp, lip);
+ if (mlip == lip)
+ mlip_changed = 1;
+ } else {
+ lip->li_flags |= XFS_LI_IN_AIL;
+ }
+ lip->li_lsn = lsn;
+ list_add(&lip->li_ail, &tmp);
}
- lip->li_lsn = lsn;
- xfs_ail_insert(ailp, lip);
+ xfs_ail_splice(ailp, &tmp, lsn);
- if (mlip == dlip) {
- mlip = xfs_ail_min(ailp);
- /*
- * It is not safe to access mlip after the AIL lock is
- * dropped, so we must get a copy of li_lsn before we do
- * so. This is especially important on 32-bit platforms
- * where accessing and updating 64-bit values like li_lsn
- * is not atomic.
- */
- tail_lsn = mlip->li_lsn;
- spin_unlock(&ailp->xa_lock);
- xfs_log_move_tail(ailp->xa_mount, tail_lsn);
- } else {
+ if (!mlip_changed) {
spin_unlock(&ailp->xa_lock);
+ return;
}
-
-} /* xfs_trans_update_ail */
+ /*
+ * It is not safe to access mlip after the AIL lock is dropped, so we
+ * must get a copy of li_lsn before we do so. This is especially
+ * important on 32-bit platforms where accessing and updating 64-bit
+ * values like li_lsn is not atomic.
+ */
+ mlip = xfs_ail_min(ailp);
+ tail_lsn = mlip->li_lsn;
+ spin_unlock(&ailp->xa_lock);
+ xfs_log_move_tail(ailp->xa_mount, tail_lsn);
+}
/*
- * Delete the given item from the AIL. It must already be in
- * the AIL.
+ * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL
*
- * Wakeup anyone with an lsn less than item's lsn. If the item
- * we delete in the AIL is the minimum one, update the tail lsn in the
- * log manager.
+ * @xfs_trans_ail_delete_bulk takes an array of log items that all need to
+ * removed from the AIL. The caller is already holding the AIL lock, and done
+ * all the checks necessary to ensure the items passed in via @log_items are
+ * ready for deletion. This includes checking that the items are in the AIL.
*
- * Clear the IN_AIL flag from the item, reset its lsn to 0, and
- * bump the AIL's generation count to indicate that the tree
- * has changed.
+ * For each log item to be removed, unlink it from the AIL, clear the IN_AIL
+ * flag from the item and reset the item's lsn to 0. If we remove the first
+ * item in the AIL, update the log tail to match the new minimum LSN in the
+ * AIL.
*
- * This function must be called with the AIL lock held. The lock
- * is dropped before returning.
+ * This function will not drop the AIL lock until all items are removed from
+ * the AIL to minimise the amount of lock traffic on the AIL. This does not
+ * greatly increase the AIL hold time, but does significantly reduce the amount
+ * of traffic on the lock, especially during IO completion.
+ *
+ * This function must be called with the AIL lock held. The lock is dropped
+ * before returning.
*/
void
-xfs_trans_ail_delete(
- struct xfs_ail *ailp,
- xfs_log_item_t *lip) __releases(ailp->xa_lock)
+xfs_trans_ail_delete_bulk(
+ struct xfs_ail *ailp,
+ struct xfs_log_item **log_items,
+ int nr_items) __releases(ailp->xa_lock)
{
- xfs_log_item_t *dlip;
xfs_log_item_t *mlip;
xfs_lsn_t tail_lsn;
+ int mlip_changed = 0;
+ int i;
- if (lip->li_flags & XFS_LI_IN_AIL) {
- mlip = xfs_ail_min(ailp);
- dlip = xfs_ail_delete(ailp, lip);
- ASSERT(dlip == lip);
- xfs_trans_ail_cursor_clear(ailp, dlip);
-
+ mlip = xfs_ail_min(ailp);
- lip->li_flags &= ~XFS_LI_IN_AIL;
- lip->li_lsn = 0;
+ for (i = 0; i < nr_items; i++) {
+ struct xfs_log_item *lip = log_items[i];
+ if (!(lip->li_flags & XFS_LI_IN_AIL)) {
+ struct xfs_mount *mp = ailp->xa_mount;
- if (mlip == dlip) {
- mlip = xfs_ail_min(ailp);
- /*
- * It is not safe to access mlip after the AIL lock
- * is dropped, so we must get a copy of li_lsn
- * before we do so. This is especially important
- * on 32-bit platforms where accessing and updating
- * 64-bit values like li_lsn is not atomic.
- */
- tail_lsn = mlip ? mlip->li_lsn : 0;
- spin_unlock(&ailp->xa_lock);
- xfs_log_move_tail(ailp->xa_mount, tail_lsn);
- } else {
spin_unlock(&ailp->xa_lock);
+ if (!XFS_FORCED_SHUTDOWN(mp)) {
+ xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
+ "%s: attempting to delete a log item that is not in the AIL",
+ __func__);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ }
+ return;
}
+
+ xfs_ail_delete(ailp, lip);
+ lip->li_flags &= ~XFS_LI_IN_AIL;
+ lip->li_lsn = 0;
+ if (mlip == lip)
+ mlip_changed = 1;
}
- else {
- /*
- * If the file system is not being shutdown, we are in
- * serious trouble if we get to this stage.
- */
- struct xfs_mount *mp = ailp->xa_mount;
+ if (!mlip_changed) {
spin_unlock(&ailp->xa_lock);
- if (!XFS_FORCED_SHUTDOWN(mp)) {
- xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
- "%s: attempting to delete a log item that is not in the AIL",
- __func__);
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- }
+ return;
}
-}
-
+ /*
+ * It is not safe to access mlip after the AIL lock is dropped, so we
+ * must get a copy of li_lsn before we do so. This is especially
+ * important on 32-bit platforms where accessing and updating 64-bit
+ * values like li_lsn is not atomic. It is possible we've emptied the
+ * AIL here, so if that is the case, pass an LSN of 0 to the tail move.
+ */
+ mlip = xfs_ail_min(ailp);
+ tail_lsn = mlip ? mlip->li_lsn : 0;
+ spin_unlock(&ailp->xa_lock);
+ xfs_log_move_tail(ailp->xa_mount, tail_lsn);
+}
/*
* The active item list (AIL) is a doubly linked list of log
@@ -623,16 +646,13 @@ xfs_trans_ail_destroy(
}
/*
- * Insert the given log item into the AIL.
- * We almost always insert at the end of the list, so on inserts
- * we search from the end of the list to find where the
- * new item belongs.
+ * splice the log item list into the AIL at the given LSN.
*/
STATIC void
-xfs_ail_insert(
+xfs_ail_splice(
struct xfs_ail *ailp,
- xfs_log_item_t *lip)
-/* ARGSUSED */
+ struct list_head *list,
+ xfs_lsn_t lsn)
{
xfs_log_item_t *next_lip;
@@ -640,39 +660,33 @@ xfs_ail_insert(
* If the list is empty, just insert the item.
*/
if (list_empty(&ailp->xa_ail)) {
- list_add(&lip->li_ail, &ailp->xa_ail);
+ list_splice(list, &ailp->xa_ail);
return;
}
list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
- if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)
+ if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
break;
}
ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
- (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0));
-
- list_add(&lip->li_ail, &next_lip->li_ail);
+ (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0));
- xfs_ail_check(ailp, lip);
+ list_splice_init(list, &next_lip->li_ail);
return;
}
/*
* Delete the given item from the AIL. Return a pointer to the item.
*/
-/*ARGSUSED*/
-STATIC xfs_log_item_t *
+STATIC void
xfs_ail_delete(
struct xfs_ail *ailp,
xfs_log_item_t *lip)
-/* ARGSUSED */
{
xfs_ail_check(ailp, lip);
-
list_del(&lip->li_ail);
-
- return lip;
+ xfs_trans_ail_cursor_clear(ailp, lip);
}
/*
@@ -682,7 +696,6 @@ xfs_ail_delete(
STATIC xfs_log_item_t *
xfs_ail_min(
struct xfs_ail *ailp)
-/* ARGSUSED */
{
if (list_empty(&ailp->xa_ail))
return NULL;
@@ -699,7 +712,6 @@ STATIC xfs_log_item_t *
xfs_ail_next(
struct xfs_ail *ailp,
xfs_log_item_t *lip)
-/* ARGSUSED */
{
if (lip->li_ail.next == &ailp->xa_ail)
return NULL;
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index f783d5e..f7590f5 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -69,12 +69,16 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp,
tp->t_flags |= XFS_TRANS_DIRTY;
efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
- next_extent = efip->efi_next_extent;
+ /*
+ * atomic_inc_return gives us the value after the increment;
+ * we want to use it as an array index so we need to subtract 1 from
+ * it.
+ */
+ next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
ASSERT(next_extent < efip->efi_format.efi_nextents);
extp = &(efip->efi_format.efi_extents[next_extent]);
extp->ext_start = start_block;
extp->ext_len = ext_len;
- efip->efi_next_extent++;
}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 62da86c..35162c2 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -22,15 +22,17 @@ struct xfs_log_item;
struct xfs_log_item_desc;
struct xfs_mount;
struct xfs_trans;
+struct xfs_ail;
+struct xfs_log_vec;
void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
void xfs_trans_del_item(struct xfs_log_item *);
void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
int flags);
-void xfs_trans_item_committed(struct xfs_log_item *lip,
- xfs_lsn_t commit_lsn, int aborted);
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
+void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
+ xfs_lsn_t commit_lsn, int aborted);
/*
* AIL traversal cursor.
*
@@ -73,12 +75,29 @@ struct xfs_ail {
/*
* From xfs_trans_ail.c
*/
-void xfs_trans_ail_update(struct xfs_ail *ailp,
- struct xfs_log_item *lip, xfs_lsn_t lsn)
- __releases(ailp->xa_lock);
-void xfs_trans_ail_delete(struct xfs_ail *ailp,
- struct xfs_log_item *lip)
- __releases(ailp->xa_lock);
+void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
+ struct xfs_log_item **log_items, int nr_items,
+ xfs_lsn_t lsn) __releases(ailp->xa_lock);
+static inline void
+xfs_trans_ail_update(
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip,
+ xfs_lsn_t lsn) __releases(ailp->xa_lock)
+{
+ xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
+}
+
+void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
+ struct xfs_log_item **log_items, int nr_items)
+ __releases(ailp->xa_lock);
+static inline void
+xfs_trans_ail_delete(
+ struct xfs_ail *ailp,
+ xfs_log_item_t *lip) __releases(ailp->xa_lock)
+{
+ xfs_trans_ail_delete_bulk(ailp, &lip, 1);
+}
+
void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t);
void xfs_trans_unlocked_item(struct xfs_ail *,
xfs_log_item_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8e4a63c4..d8e6f8c 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -964,29 +964,48 @@ xfs_release(
xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
}
- if (ip->i_d.di_nlink != 0) {
- if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
- ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
- ip->i_delayed_blks > 0)) &&
- (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
- (!(ip->i_d.di_flags &
- (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
+ if (ip->i_d.di_nlink == 0)
+ return 0;
- /*
- * If we can't get the iolock just skip truncating
- * the blocks past EOF because we could deadlock
- * with the mmap_sem otherwise. We'll get another
- * chance to drop them once the last reference to
- * the inode is dropped, so we'll never leak blocks
- * permanently.
- */
- error = xfs_free_eofblocks(mp, ip,
- XFS_FREE_EOF_TRYLOCK);
- if (error)
- return error;
- }
- }
+ if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
+ ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
+ ip->i_delayed_blks > 0)) &&
+ (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
+ (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
+ /*
+ * If we can't get the iolock just skip truncating the blocks
+ * past EOF because we could deadlock with the mmap_sem
+ * otherwise. We'll get another chance to drop them once the
+ * last reference to the inode is dropped, so we'll never leak
+ * blocks permanently.
+ *
+ * Further, check if the inode is being opened, written and
+ * closed frequently and we have delayed allocation blocks
+ * oustanding (e.g. streaming writes from the NFS server),
+ * truncating the blocks past EOF will cause fragmentation to
+ * occur.
+ *
+ * In this case don't do the truncation, either, but we have to
+ * be careful how we detect this case. Blocks beyond EOF show
+ * up as i_delayed_blks even when the inode is clean, so we
+ * need to truncate them away first before checking for a dirty
+ * release. Hence on the first dirty close we will still remove
+ * the speculative allocation, but after that we will leave it
+ * in place.
+ */
+ if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
+ return 0;
+
+ error = xfs_free_eofblocks(mp, ip,
+ XFS_FREE_EOF_TRYLOCK);
+ if (error)
+ return error;
+
+ /* delalloc blocks after truncation means it really is dirty */
+ if (ip->i_delayed_blks)
+ xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
+ }
return 0;
}
OpenPOWER on IntegriCloud