From d9c740d2253e75db8cef8f87a3125c450f3ebd82 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 28 Jan 2010 11:58:08 +0200 Subject: exofs: Define on-disk per-inode optional layout attribute * Layouts describe the way a file is spread on multiple devices. The layout information is stored in the objects attribute introduced in this patch. * There can be multiple generating function for the layout. Currently defined: - No attribute present - use below moving-window on global device table, all devices. (This is the only one currently used in exofs) - an obj_id generated moving window - the obj_id is a randomizing factor in the otherwise global map layout. - An explicit layout stored, including a data_map and a device index list. - More might be defined in future ... * There are two attributes defined of the same structure: A-data-files-layout - This layout is used by data-files. If present at a directory, all files of that directory will be created with this layout. A-meta-data-layout - This layout is used by a directory and other meta-data information. Also inherited at creation of subdirectories. * At creation time inodes are created with the layout specified above. A usermode utility may change the creation layout on a give directory or file. Which in the case of directories, will also apply to newly created files/subdirectories, children of that directory. In the simple unaltered case of a newly created exofs, no layout attributes are present, and all layouts adhere to the layout specified at the device-table. * In case of a future file system loaded in an old exofs-driver. At iget(), the generating_function is inspected and if not supported will return an IO error to the application and the inode will not be loaded. So not to damage any data. Note: After this patch we do not yet support any type of layout only the RAID0 patch that enables striping at the super-block level will add support for RAID0 layouts above. This way we are past and future compatible and fully bisectable. * Access to the device table is done by an accessor since it will change according to above information. Signed-off-by: Boaz Harrosh --- fs/exofs/common.h | 39 ++++++++++++++++++++++++++++++++++++++ fs/exofs/exofs.h | 6 ++++++ fs/exofs/inode.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++----- fs/exofs/ios.c | 23 ++++++++++++++++++----- 4 files changed, 114 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/exofs/common.h b/fs/exofs/common.h index b1b178e..f0d5203 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h @@ -55,6 +55,8 @@ /* exofs Application specific page/attribute */ # define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3) # define EXOFS_ATTR_INODE_DATA 1 +# define EXOFS_ATTR_INODE_FILE_LAYOUT 2 +# define EXOFS_ATTR_INODE_DIR_LAYOUT 3 /* * The maximum number of files we can have is limited by the size of the @@ -206,4 +208,41 @@ enum { (((name_len) + offsetof(struct exofs_dir_entry, name) + \ EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) +/* + * The on-disk (optional) layout structure. + * sits in an EXOFS_ATTR_INODE_FILE_LAYOUT or EXOFS_ATTR_INODE_DIR_LAYOUT + * attribute, attached to any inode, usually to a directory. + */ + +enum exofs_inode_layout_gen_functions { + LAYOUT_MOVING_WINDOW = 0, + LAYOUT_IMPLICT = 1, +}; + +struct exofs_on_disk_inode_layout { + __le16 gen_func; /* One of enum exofs_inode_layout_gen_functions */ + __le16 pad; + union { + /* gen_func == LAYOUT_MOVING_WINDOW (default) */ + struct exofs_layout_sliding_window { + __le32 num_devices; /* first n devices in global-table*/ + } sliding_window __packed; + + /* gen_func == LAYOUT_IMPLICT */ + struct exofs_layout_implict_list { + struct exofs_dt_data_map data_map; + /* Variable array of size data_map.cb_num_comps. These + * are device indexes of the devices in the global table + */ + __le32 dev_indexes[]; + } implict __packed; + }; +} __packed; + +static inline size_t exofs_on_disk_inode_layout_size(unsigned max_devs) +{ + return sizeof(struct exofs_on_disk_inode_layout) + + max_devs * sizeof(__le32); +} + #endif /*ifndef __EXOFS_COM_H__*/ diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 33c6856..09e3319 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -186,6 +186,12 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) } /* + * Given a layout, object_number and stripe_index return the associated global + * dev_index + */ +unsigned exofs_layout_od_id(struct exofs_layout *layout, + osd_id obj_no, unsigned layout_index); +/* * Maximum count of links to a file */ #define EXOFS_LINK_MAX 32000 diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 03189a9..0163546 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -859,6 +859,15 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr) return error; } +static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF( + EXOFS_APAGE_FS_DATA, + EXOFS_ATTR_INODE_FILE_LAYOUT, + 0); +static const struct osd_attr g_attr_inode_dir_layout = ATTR_DEF( + EXOFS_APAGE_FS_DATA, + EXOFS_ATTR_INODE_DIR_LAYOUT, + 0); + /* * Read an inode from the OSD, and return it as is. We also return the size * attribute in the 'obj_size' argument. @@ -867,11 +876,16 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, struct exofs_fcb *inode, uint64_t *obj_size) { struct exofs_sb_info *sbi = sb->s_fs_info; - struct osd_attr attrs[2]; + struct osd_attr attrs[] = { + [0] = g_attr_inode_data, + [1] = g_attr_inode_file_layout, + [2] = g_attr_inode_dir_layout, + [3] = g_attr_logical_length, + }; struct exofs_io_state *ios; + struct exofs_on_disk_inode_layout *layout; int ret; - *obj_size = ~0; ret = exofs_get_io_state(&sbi->layout, &ios); if (unlikely(ret)) { EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); @@ -882,8 +896,9 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, exofs_make_credential(oi->i_cred, &ios->obj); ios->cred = oi->i_cred; - attrs[0] = g_attr_inode_data; - attrs[1] = g_attr_logical_length; + attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); + attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); + ios->in_attr = attrs; ios->in_attr_len = ARRAY_SIZE(attrs); @@ -901,11 +916,42 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, ret = extract_attr_from_ios(ios, &attrs[1]); if (ret) { + EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); + goto out; + } + if (attrs[1].len) { + layout = attrs[1].val_ptr; + if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { + EXOFS_ERR("%s: unsupported files layout %d\n", + __func__, layout->gen_func); + ret = -ENOTSUPP; + goto out; + } + } + + ret = extract_attr_from_ios(ios, &attrs[2]); + if (ret) { + EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); + goto out; + } + if (attrs[2].len) { + layout = attrs[2].val_ptr; + if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { + EXOFS_ERR("%s: unsupported meta-data layout %d\n", + __func__, layout->gen_func); + ret = -ENOTSUPP; + goto out; + } + } + + *obj_size = ~0; + ret = extract_attr_from_ios(ios, &attrs[3]); + if (ret) { EXOFS_ERR("%s: extract_attr of logical_length failed\n", __func__); goto out; } - *obj_size = get_unaligned_be64(attrs[1].val_ptr); + *obj_size = get_unaligned_be64(attrs[3].val_ptr); out: exofs_put_io_state(ios); diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 4f67931..2b81f99 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -107,6 +107,19 @@ void exofs_put_io_state(struct exofs_io_state *ios) } } +unsigned exofs_layout_od_id(struct exofs_layout *layout, + osd_id obj_no, unsigned layout_index) +{ + return layout_index; +} + +static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios, + unsigned layout_index) +{ + return ios->layout->s_ods[ + exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)]; +} + static void _sync_done(struct exofs_io_state *ios, void *p) { struct completion *waiting = p; @@ -242,7 +255,7 @@ int exofs_sbi_create(struct exofs_io_state *ios) for (i = 0; i < ios->layout->s_numdevs; i++) { struct osd_request *or; - or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL); + or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -266,7 +279,7 @@ int exofs_sbi_remove(struct exofs_io_state *ios) for (i = 0; i < ios->layout->s_numdevs; i++) { struct osd_request *or; - or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL); + or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -290,7 +303,7 @@ int exofs_sbi_write(struct exofs_io_state *ios) for (i = 0; i < ios->layout->s_numdevs; i++) { struct osd_request *or; - or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL); + or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -361,7 +374,7 @@ int exofs_sbi_read(struct exofs_io_state *ios) unsigned first_dev = (unsigned)ios->obj.id; first_dev %= ios->layout->s_numdevs; - or = osd_start_request(ios->layout->s_ods[first_dev], GFP_KERNEL); + or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); return -ENOMEM; @@ -442,7 +455,7 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) for (i = 0; i < sbi->layout.s_numdevs; i++) { struct osd_request *or; - or = osd_start_request(sbi->layout.s_ods[i], GFP_KERNEL); + or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; -- cgit v1.1