summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-18 10:32:00 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-18 10:32:00 -0700
commitd756d10e246a01515d07f8161181b8a14afba7cc (patch)
treede7336f2b4b596881468bf65cb2f2f88cedcde86 /include
parentcdf4a6482dd4c739f8c1132c5a9356912911fec5 (diff)
parente9f410b1c035b6e63f0b4c3d6cfe4298d6a04492 (diff)
downloadop-kernel-dev-d756d10e246a01515d07f8161181b8a14afba7cc.zip
op-kernel-dev-d756d10e246a01515d07f8161181b8a14afba7cc.tar.gz
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: extent macros cleanup Fix compilation with EXT_DEBUG, also fix leXX_to_cpu conversions. ext4: remove extra IS_RDONLY() check ext4: Use is_power_of_2() Use zero_user_page() in ext4 where possible ext4: Remove 65000 subdirectory limit ext4: Expand extra_inodes space per the s_{want,min}_extra_isize fields ext4: Add nanosecond timestamps jbd2: Move jbd2-debug file to debugfs jbd2: Fix CONFIG_JBD_DEBUG ifdef to be CONFIG_JBD2_DEBUG ext4: Set the journal JBD2_FEATURE_INCOMPAT_64BIT on large devices ext4: Make extents code sanely handle on-disk corruption ext4: copy i_flags to inode flags on write ext4: Enable extents by default Change on-disk format to support 2^15 uninitialized extents write support for preallocated blocks fallocate support in ext4 sys_fallocate() implementation on i386, x86_64 and powerpc
Diffstat (limited to 'include')
-rw-r--r--include/asm-i386/unistd.h3
-rw-r--r--include/asm-powerpc/systbl.h1
-rw-r--r--include/asm-powerpc/unistd.h3
-rw-r--r--include/asm-x86_64/unistd.h2
-rw-r--r--include/linux/ext4_fs.h104
-rw-r--r--include/linux/ext4_fs_extents.h43
-rw-r--r--include/linux/ext4_fs_i.h5
-rw-r--r--include/linux/ext4_fs_sb.h3
-rw-r--r--include/linux/falloc.h6
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/jbd2.h6
-rw-r--r--include/linux/syscalls.h1
12 files changed, 168 insertions, 11 deletions
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index e84ace1..9b15545 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -329,10 +329,11 @@
#define __NR_signalfd 321
#define __NR_timerfd 322
#define __NR_eventfd 323
+#define __NR_fallocate 324
#ifdef __KERNEL__
-#define NR_syscalls 324
+#define NR_syscalls 325
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h
index 1cc3f9c..cc6d872 100644
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -308,6 +308,7 @@ COMPAT_SYS_SPU(move_pages)
SYSCALL_SPU(getcpu)
COMPAT_SYS(epoll_pwait)
COMPAT_SYS_SPU(utimensat)
+COMPAT_SYS(fallocate)
COMPAT_SYS_SPU(signalfd)
COMPAT_SYS_SPU(timerfd)
SYSCALL_SPU(eventfd)
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index f71c606..97d82b6 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -331,10 +331,11 @@
#define __NR_timerfd 306
#define __NR_eventfd 307
#define __NR_sync_file_range2 308
+#define __NR_fallocate 309
#ifdef __KERNEL__
-#define __NR_syscalls 309
+#define __NR_syscalls 310
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 8696f8a..fc4e73f 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -630,6 +630,8 @@ __SYSCALL(__NR_signalfd, sys_signalfd)
__SYSCALL(__NR_timerfd, sys_timerfd)
#define __NR_eventfd 284
__SYSCALL(__NR_eventfd, sys_eventfd)
+#define __NR_fallocate 285
+__SYSCALL(__NR_fallocate, sys_fallocate)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index de1f9f7..cdee7aa 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -71,7 +71,7 @@
/*
* Maximal count of links to a file
*/
-#define EXT4_LINK_MAX 32000
+#define EXT4_LINK_MAX 65000
/*
* Macro-instructions used to manage several block sizes
@@ -102,6 +102,7 @@
EXT4_GOOD_OLD_FIRST_INO : \
(s)->s_first_ino)
#endif
+#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
/*
* Macro-instructions used to manage fragments
@@ -201,6 +202,7 @@ struct ext4_group_desc
#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
+#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
/* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input {
@@ -225,6 +227,11 @@ struct ext4_new_group_data {
__u32 free_blocks_count;
};
+/*
+ * Following is used by preallocation code to tell get_blocks() that we
+ * want uninitialzed extents.
+ */
+#define EXT4_CREATE_UNINITIALIZED_EXT 2
/*
* ioctl commands
@@ -237,7 +244,7 @@ struct ext4_new_group_data {
#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
-#ifdef CONFIG_JBD_DEBUG
+#ifdef CONFIG_JBD2_DEBUG
#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
#endif
#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
@@ -253,7 +260,7 @@ struct ext4_new_group_data {
#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
-#ifdef CONFIG_JBD_DEBUG
+#ifdef CONFIG_JBD2_DEBUG
#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
#endif
#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
@@ -282,7 +289,7 @@ struct ext4_inode {
__le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size; /* Size in bytes */
__le32 i_atime; /* Access time */
- __le32 i_ctime; /* Creation time */
+ __le32 i_ctime; /* Inode Change time */
__le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
@@ -331,10 +338,85 @@ struct ext4_inode {
} osd2; /* OS dependent 2 */
__le16 i_extra_isize;
__le16 i_pad1;
+ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
+ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
+ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
+ __le32 i_crtime; /* File Creation time */
+ __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
};
#define i_size_high i_dir_acl
+#define EXT4_EPOCH_BITS 2
+#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
+#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
+
+/*
+ * Extended fields will fit into an inode if the filesystem was formatted
+ * with large inodes (-I 256 or larger) and there are not currently any EAs
+ * consuming all of the available space. For new inodes we always reserve
+ * enough space for the kernel's known extended fields, but for inodes
+ * created with an old kernel this might not have been the case. None of
+ * the extended inode fields is critical for correct filesystem operation.
+ * This macro checks if a certain field fits in the inode. Note that
+ * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
+ */
+#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
+ ((offsetof(typeof(*ext4_inode), field) + \
+ sizeof((ext4_inode)->field)) \
+ <= (EXT4_GOOD_OLD_INODE_SIZE + \
+ (einode)->i_extra_isize)) \
+
+static inline __le32 ext4_encode_extra_time(struct timespec *time)
+{
+ return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
+ time->tv_sec >> 32 : 0) |
+ ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
+}
+
+static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
+{
+ if (sizeof(time->tv_sec) > 4)
+ time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
+ << 32;
+ time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
+}
+
+#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
+do { \
+ (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
+ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
+ (raw_inode)->xtime ## _extra = \
+ ext4_encode_extra_time(&(inode)->xtime); \
+} while (0)
+
+#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
+do { \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
+ (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
+ (raw_inode)->xtime ## _extra = \
+ ext4_encode_extra_time(&(einode)->xtime); \
+} while (0)
+
+#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
+do { \
+ (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
+ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
+ ext4_decode_extra_time(&(inode)->xtime, \
+ raw_inode->xtime ## _extra); \
+} while (0)
+
+#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
+do { \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
+ (einode)->xtime.tv_sec = \
+ (signed)le32_to_cpu((raw_inode)->xtime); \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
+ ext4_decode_extra_time(&(einode)->xtime, \
+ raw_inode->xtime ## _extra); \
+} while (0)
+
#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1 osd1.linux1.l_i_reserved1
#define i_frag osd2.linux2.l_i_frag
@@ -533,6 +615,13 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
return container_of(inode, struct ext4_inode_info, vfs_inode);
}
+static inline struct timespec ext4_current_time(struct inode *inode)
+{
+ return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
+ current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
+}
+
+
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
@@ -603,6 +692,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
+#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
+#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -620,6 +711,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
EXT4_FEATURE_INCOMPAT_64BIT)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+ EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
/*
@@ -862,6 +955,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern void ext4_truncate (struct inode *);
extern void ext4_set_inode_flags(struct inode *);
+extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
@@ -983,6 +1077,8 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
extern void ext4_ext_truncate(struct inode *, struct page *);
extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *);
+extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
+ loff_t len);
static inline int
ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index acfe5974..81406f3 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -141,7 +141,25 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
#define EXT_MAX_BLOCK 0xffffffff
-#define EXT_MAX_LEN ((1UL << 15) - 1)
+/*
+ * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
+ * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
+ * MSB of ee_len field in the extent datastructure to signify if this
+ * particular extent is an initialized extent or an uninitialized (i.e.
+ * preallocated).
+ * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
+ * uninitialized extent.
+ * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
+ * uninitialized one. In other words, if MSB of ee_len is set, it is an
+ * uninitialized extent with only one special scenario when ee_len = 0x8000.
+ * In this case we can not have an uninitialized extent of zero length and
+ * thus we make it as a special case of initialized extent with 0x8000 length.
+ * This way we get better extent-to-group alignment for initialized extents.
+ * Hence, the maximum number of blocks we can have in an *initialized*
+ * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
+ */
+#define EXT_INIT_MAX_LEN (1UL << 15)
+#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1)
#define EXT_FIRST_EXTENT(__hdr__) \
@@ -188,8 +206,31 @@ ext4_ext_invalidate_cache(struct inode *inode)
EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
}
+static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
+{
+ /* We can not have an uninitialized extent of zero length! */
+ BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
+ ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
+}
+
+static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
+{
+ /* Extent with ee_len of 0x8000 is treated as an initialized extent */
+ return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
+}
+
+static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
+{
+ return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
+ le16_to_cpu(ext->ee_len) :
+ (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
+}
+
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern int ext4_ext_try_to_merge(struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *);
extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index 9de4944..1a511e99 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -153,6 +153,11 @@ struct ext4_inode_info {
unsigned long i_ext_generation;
struct ext4_ext_cache i_cached_extent;
+ /*
+ * File creation time. Its function is same as that of
+ * struct timespec i_{a,c,m}time in the generic inode.
+ */
+ struct timespec i_crtime;
};
#endif /* _LINUX_EXT4_FS_I */
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index 2347557..1b2ffee 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -73,7 +73,7 @@ struct ext4_sb_info {
struct list_head s_orphan;
unsigned long s_commit_interval;
struct block_device *journal_bdev;
-#ifdef CONFIG_JBD_DEBUG
+#ifdef CONFIG_JBD2_DEBUG
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
#endif
@@ -81,6 +81,7 @@ struct ext4_sb_info {
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
+ unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
#ifdef EXTENTS_STATS
/* ext4 extents stats */
diff --git a/include/linux/falloc.h b/include/linux/falloc.h
new file mode 100644
index 0000000..8e912ab
--- /dev/null
+++ b/include/linux/falloc.h
@@ -0,0 +1,6 @@
+#ifndef _FALLOC_H_
+#define _FALLOC_H_
+
+#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
+
+#endif /* _FALLOC_H_ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98205f6..0b806c5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1147,6 +1147,8 @@ struct inode_operations {
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
void (*truncate_range)(struct inode *, loff_t, loff_t);
+ long (*fallocate)(struct inode *inode, int mode, loff_t offset,
+ loff_t len);
};
struct seq_file;
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 0e0fedd..260d6d7 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -50,14 +50,14 @@
*/
#define JBD_DEFAULT_MAX_COMMIT_AGE 5
-#ifdef CONFIG_JBD_DEBUG
+#ifdef CONFIG_JBD2_DEBUG
/*
* Define JBD_EXPENSIVE_CHECKING to enable more expensive internal
* consistency checks. By default we don't do this unless
- * CONFIG_JBD_DEBUG is on.
+ * CONFIG_JBD2_DEBUG is on.
*/
#define JBD_EXPENSIVE_CHECKING
-extern int jbd2_journal_enable_debug;
+extern u8 jbd2_journal_enable_debug;
#define jbd_debug(n, f, a...) \
do { \
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 83d0ec1..7a8b1e3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -610,6 +610,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
const struct itimerspec __user *utmr);
asmlinkage long sys_eventfd(unsigned int count);
+asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
OpenPOWER on IntegriCloud