From 5a7bc8eb29b8c759df374d97b6189e03d4ea71c5 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 18 Aug 2008 17:38:46 +0800 Subject: ocfs2: Add the basic xattr disk layout in ocfs2_fs.h Ocfs2 uses a very flexible structure for storing extended attributes on disk. Small amount of attributes are stored directly in the inode block - up to 256 bytes worth. If that fills up, attributes are also stored in an external block, linked to from the inode block. That block can in turn expand to a btree, capable of storing large numbers of attributes. Individual attribute values are stored inline if they're small enough (currently about 80 bytes, this can be changed though), and otherwise are expanded to a btree. The theoretical limit to the size of an individual attribute is about the same as an inode, though the kernel's upper bound on the size of an attributes data is far smaller. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/ocfs2_fs.h | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) (limited to 'fs/ocfs2/ocfs2_fs.h') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 4f61985..1b46505 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -64,6 +64,7 @@ #define OCFS2_INODE_SIGNATURE "INODE01" #define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" #define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" +#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01" /* Compatibility flags */ #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ @@ -715,6 +716,123 @@ struct ocfs2_group_desc /*40*/ __u8 bg_bitmap[0]; }; +/* + * On disk extended attribute structure for OCFS2. + */ + +/* + * ocfs2_xattr_entry indicates one extend attribute. + * + * Note that it can be stored in inode, one block or one xattr bucket. + */ +struct ocfs2_xattr_entry { + __le32 xe_name_hash; /* hash value of xattr prefix+suffix. */ + __le16 xe_name_offset; /* byte offset from the 1st etnry in the local + local xattr storage(inode, xattr block or + xattr bucket). */ + __u8 xe_name_len; /* xattr name len, does't include prefix. */ + __u8 xe_type; /* the low 7 bits indicates the name prefix's + * type and the highest 1 bits indicate whether + * the EA is stored in the local storage. */ + __le64 xe_value_size; /* real xattr value length. */ +}; + +/* + * On disk structure for xattr header. + * + * One ocfs2_xattr_header describes how many ocfs2_xattr_entry records in + * the local xattr storage. + */ +struct ocfs2_xattr_header { + __le16 xh_count; /* contains the count of how + many records are in the + local xattr storage. */ + __le16 xh_reserved1; + __le32 xh_reserved2; + __le64 xh_csum; + struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ +}; + +/* + * On disk structure for xattr value root. + * + * It is used when one extended attribute's size is larger, and we will save it + * in an outside cluster. It will stored in a b-tree like file content. + */ +struct ocfs2_xattr_value_root { +/*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */ + __le32 xr_reserved0; + __le64 xr_last_eb_blk; /* Pointer to last extent block */ +/*10*/ struct ocfs2_extent_list xr_list; /* Extent record list */ +}; + +/* + * On disk structure for xattr tree root. + * + * It is used when there are too many extended attributes for one file. These + * attributes will be organized and stored in an indexed-btree. + */ +struct ocfs2_xattr_tree_root { +/*00*/ __le32 xt_clusters; /* clusters covered by xattr. */ + __le32 xt_reserved0; + __le64 xt_last_eb_blk; /* Pointer to last extent block */ +/*10*/ struct ocfs2_extent_list xt_list; /* Extent record list */ +}; + +#define OCFS2_XATTR_INDEXED 0x1 + +/* + * On disk structure for xattr block. + */ +struct ocfs2_xattr_block { +/*00*/ __u8 xb_signature[8]; /* Signature for verification */ + __le16 xb_suballoc_slot; /* Slot suballocator this + block belongs to. */ + __le16 xb_suballoc_bit; /* Bit offset in suballocator + block group */ + __le32 xb_fs_generation; /* Must match super block */ +/*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */ + __le64 xb_csum; +/*20*/ __le16 xb_flags; /* Indicates whether this block contains + real xattr or a xattr tree. */ + __le16 xb_reserved0; + __le32 xb_reserved1; + __le64 xb_reserved2; +/*30*/ union { + struct ocfs2_xattr_header xb_header; /* xattr header if this + block contains xattr */ + struct ocfs2_xattr_tree_root xb_root;/* xattr tree root if this + block cotains xattr + tree. */ + } xb_attrs; +}; + +#define OCFS2_XATTR_ENTRY_LOCAL 0x80 +#define OCFS2_XATTR_TYPE_MASK 0x7F +static inline void ocfs2_xattr_set_local(struct ocfs2_xattr_entry *xe, + int local) +{ + if (local) + xe->xe_type |= OCFS2_XATTR_ENTRY_LOCAL; + else + xe->xe_type &= ~OCFS2_XATTR_ENTRY_LOCAL; +} + +static inline int ocfs2_xattr_is_local(struct ocfs2_xattr_entry *xe) +{ + return xe->xe_type & OCFS2_XATTR_ENTRY_LOCAL; +} + +static inline void ocfs2_xattr_set_type(struct ocfs2_xattr_entry *xe, int type) +{ + xe->xe_type |= type & OCFS2_XATTR_TYPE_MASK; +} + +static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe) +{ + return xe->xe_type & OCFS2_XATTR_TYPE_MASK; +} + #ifdef __KERNEL__ static inline int ocfs2_fast_symlink_chars(struct super_block *sb) { -- cgit v1.1 From fdd77704a8b4666a32120fcd1e4a9fedaf3263d8 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Mon, 18 Aug 2008 17:08:55 +0800 Subject: ocfs2: reserve inline space for extended attribute Add the structures and helper functions we want for handling inline extended attributes. We also update the inline-data handlers so that they properly function in the event that we have both inline data and inline attributes sharing an inode block. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/ocfs2_fs.h | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) (limited to 'fs/ocfs2/ocfs2_fs.h') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 1b46505..1055ba0 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -300,6 +300,12 @@ struct ocfs2_new_group_input { */ #define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 +/* + * Inline extended attribute size (in bytes) + * The value chosen should be aligned to 16 byte boundaries. + */ +#define OCFS2_MIN_XATTR_INLINE_SIZE 256 + struct ocfs2_system_inode_info { char *si_name; int si_iflags; @@ -622,7 +628,8 @@ struct ocfs2_dinode { belongs to */ __le16 i_suballoc_bit; /* Bit offset in suballocator block group */ -/*10*/ __le32 i_reserved0; +/*10*/ __le16 i_reserved0; + __le16 i_xattr_inline_size; __le32 i_clusters; /* Cluster count */ __le32 i_uid; /* Owner UID */ __le32 i_gid; /* Owning GID */ @@ -641,11 +648,12 @@ struct ocfs2_dinode { __le32 i_atime_nsec; __le32 i_ctime_nsec; __le32 i_mtime_nsec; - __le32 i_attr; +/*70*/ __le32 i_attr; __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL was set in i_flags */ __le16 i_dyn_features; -/*70*/ __le64 i_reserved2[8]; + __le64 i_xattr_loc; +/*80*/ __le64 i_reserved2[7]; /*B8*/ union { __le64 i_pad1; /* Generic way to refer to this 64bit union */ @@ -846,6 +854,20 @@ static inline int ocfs2_max_inline_data(struct super_block *sb) offsetof(struct ocfs2_dinode, id2.i_data.id_data); } +static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb, + struct ocfs2_dinode *di) +{ + unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size); + + if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL) + return sb->s_blocksize - + offsetof(struct ocfs2_dinode, id2.i_data.id_data) - + xattrsize; + else + return sb->s_blocksize - + offsetof(struct ocfs2_dinode, id2.i_data.id_data); +} + static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) { int size; @@ -856,6 +878,24 @@ static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) return size / sizeof(struct ocfs2_extent_rec); } +static inline int ocfs2_extent_recs_per_inode_with_xattr( + struct super_block *sb, + struct ocfs2_dinode *di) +{ + int size; + unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size); + + if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL) + size = sb->s_blocksize - + offsetof(struct ocfs2_dinode, id2.i_list.l_recs) - + xattrsize; + else + size = sb->s_blocksize - + offsetof(struct ocfs2_dinode, id2.i_list.l_recs); + + return size / sizeof(struct ocfs2_extent_rec); +} + static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) { int size; -- cgit v1.1 From cf1d6c763fbcb115263114302485ad17e7933d87 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Mon, 18 Aug 2008 17:11:00 +0800 Subject: ocfs2: Add extended attribute support This patch implements storing extended attributes both in inode or a single external block. We only store EA's in-inode when blocksize > 512 or that inode block has free space for it. When an EA's value is larger than 80 bytes, we will store the value via b-tree outside inode or block. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/ocfs2_fs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2/ocfs2_fs.h') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 1055ba0..98e1f8b 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -570,7 +570,7 @@ struct ocfs2_super_block { /*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts before tunefs required */ __le16 s_tunefs_flag; - __le32 s_reserved1; + __le32 s_uuid_hash; /* hash value of uuid */ __le64 s_first_cluster_group; /* Block offset of 1st cluster * group header */ /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ @@ -787,7 +787,11 @@ struct ocfs2_xattr_tree_root { /*10*/ struct ocfs2_extent_list xt_list; /* Extent record list */ }; -#define OCFS2_XATTR_INDEXED 0x1 +#define OCFS2_XATTR_INDEXED 0x1 +#define OCFS2_HASH_SHIFT 5 +#define OCFS2_XATTR_ROUND 3 +#define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \ + ~(OCFS2_XATTR_ROUND)) /* * On disk structure for xattr block. -- cgit v1.1 From 0c044f0b24b9128ba8c297149d88bd81f2e36af3 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 18 Aug 2008 17:38:50 +0800 Subject: ocfs2: Add xattr bucket iteration for large numbers of EAs Ocfs2 breaks up xattr index tree leaves into 4k regions, called buckets. Attributes are stored within a given bucket, depending on hash value. After a discussion with Mark, we decided that the per-bucket index (xe_entry[]) would only exist in the 1st block of a bucket. Likewise, name/value pairs will not straddle more than one block. This allows the majority of operations to work directly on the buffer heads in a leaf block. This patch adds code to iterate the buckets in an EA. A new abstration of ocfs2_xattr_bucket is added. It records the bhs in this bucket and ocfs2_xattr_header. This keeps the code neat, improving readibility. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/ocfs2_fs.h | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2/ocfs2_fs.h') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 98e1f8b..8d5e72f 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -755,8 +755,13 @@ struct ocfs2_xattr_header { __le16 xh_count; /* contains the count of how many records are in the local xattr storage. */ - __le16 xh_reserved1; - __le32 xh_reserved2; + __le16 xh_free_start; /* current offset for storing + xattr. */ + __le16 xh_name_value_len; /* total length of name/value + length in this bucket. */ + __le16 xh_num_buckets; /* bucket nums in one extent + record, only valid in the + first bucket. */ __le64 xh_csum; struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ }; @@ -793,6 +798,10 @@ struct ocfs2_xattr_tree_root { #define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \ ~(OCFS2_XATTR_ROUND)) +#define OCFS2_XATTR_BUCKET_SIZE 4096 +#define OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET (OCFS2_XATTR_BUCKET_SIZE \ + / OCFS2_MIN_BLOCKSIZE) + /* * On disk structure for xattr block. */ @@ -963,6 +972,17 @@ static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index) return 0; } + +static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb) +{ + int size; + + size = sb->s_blocksize - + offsetof(struct ocfs2_xattr_block, + xb_attrs.xb_root.xt_list.l_recs); + + return size / sizeof(struct ocfs2_extent_rec); +} #else static inline int ocfs2_fast_symlink_chars(int blocksize) { @@ -1046,6 +1066,17 @@ static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index) return 0; } + +static inline int ocfs2_xattr_recs_per_xb(int blocksize) +{ + int size; + + size = blocksize - + offsetof(struct ocfs2_xattr_block, + xb_attrs.xb_root.xt_list.l_recs); + + return size / sizeof(struct ocfs2_extent_rec); +} #endif /* __KERNEL__ */ -- cgit v1.1 From 8154da3d2114241cf3edb108b43e2172be86d483 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Mon, 18 Aug 2008 17:11:46 +0800 Subject: ocfs2: Add incompatible flag for extended attribute This patch adds the s_incompat flag for extended attribute support. This helps us ensure that older versions of Ocfs2 or ocfs2-tools will not be able to mount a volume with xattr support. Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/ocfs2_fs.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'fs/ocfs2/ocfs2_fs.h') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 8d5e72f..f24ce3d 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -91,7 +91,8 @@ | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ - | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK) + | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ + | OCFS2_FEATURE_INCOMPAT_XATTR) #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN /* @@ -128,10 +129,6 @@ /* Support for data packed into inode blocks */ #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 -/* Support for the extended slot map */ -#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100 - - /* * Support for alternate, userspace cluster stacks. If set, the superblock * field s_cluster_info contains a tag for the alternate stack in use as @@ -143,6 +140,12 @@ */ #define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 +/* Support for the extended slot map */ +#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100 + +/* Support for extended attributes */ +#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200 + /* * backup superblock flag is used to indicate that this volume * has backup superblocks. @@ -578,7 +581,11 @@ struct ocfs2_super_block { /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace stack. Only valid with INCOMPAT flag. */ -/*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */ +/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size + for this fs*/ + __le16 s_reserved0; + __le32 s_reserved1; +/*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */ /*140*/ /* -- cgit v1.1