From a2daff6803a384ce065e3681a2affea1da59c5f5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 31 May 2011 14:09:00 -0700 Subject: fuse: fix non-ANSI void function notation Fix void function parameter list sparse warning: fs/fuse/inode.c:74:44: warning: non-ANSI function declaration of function 'fuse_alloc_forget' Signed-off-by: Randy Dunlap Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index cc6ec4b..5354906e 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -71,7 +71,7 @@ struct fuse_mount_data { unsigned blksize; }; -struct fuse_forget_link *fuse_alloc_forget() +struct fuse_forget_link *fuse_alloc_forget(void) { return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL); } -- cgit v1.1 From afe48049ab1d0ca83afe45f9d5116bf4507741eb Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 17 Jun 2011 08:21:10 +0000 Subject: ARM: mach-shmobile: sh7372: Add USB-DMAC support Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/clock-sh7372.c | 5 +- arch/arm/mach-shmobile/include/mach/sh7372.h | 4 + arch/arm/mach-shmobile/setup-sh7372.c | 146 +++++++++++++++++++++++++++ 3 files changed, 154 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index c0800d8..c239ab1 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -509,7 +509,7 @@ enum { MSTP001, MSTP118, MSTP117, MSTP116, MSTP113, MSTP106, MSTP101, MSTP100, MSTP223, - MSTP218, MSTP217, MSTP216, + MSTP214, MSTP218, MSTP217, MSTP216, MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, MSTP329, MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312, MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP406, MSTP403, @@ -538,6 +538,7 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP218] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 18, 0), /* DMAC1 */ [MSTP217] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 17, 0), /* DMAC2 */ [MSTP216] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 16, 0), /* DMAC3 */ + [MSTP214] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 14, 0), /* USBDMAC */ [MSTP207] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 7, 0), /* SCIFA5 */ [MSTP206] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 6, 0), /* SCIFB */ [MSTP204] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 4, 0), /* SCIFA0 */ @@ -633,6 +634,8 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP218]), /* DMAC1 */ CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[MSTP217]), /* DMAC2 */ CLKDEV_DEV_ID("sh-dma-engine.2", &mstp_clks[MSTP216]), /* DMAC3 */ + CLKDEV_DEV_ID("sh-dma-engine.3", &mstp_clks[MSTP214]), /* USB-DMAC0 */ + CLKDEV_DEV_ID("sh-dma-engine.4", &mstp_clks[MSTP214]), /* USB-DMAC1 */ CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */ CLKDEV_DEV_ID("sh-sci.6", &mstp_clks[MSTP206]), /* SCIFB */ CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP204]), /* SCIFA0 */ diff --git a/arch/arm/mach-shmobile/include/mach/sh7372.h b/arch/arm/mach-shmobile/include/mach/sh7372.h index df20d76..51db9d3 100644 --- a/arch/arm/mach-shmobile/include/mach/sh7372.h +++ b/arch/arm/mach-shmobile/include/mach/sh7372.h @@ -458,6 +458,10 @@ enum { SHDMA_SLAVE_SDHI2_TX, SHDMA_SLAVE_MMCIF_RX, SHDMA_SLAVE_MMCIF_TX, + SHDMA_SLAVE_USB0_TX, + SHDMA_SLAVE_USB0_RX, + SHDMA_SLAVE_USB1_TX, + SHDMA_SLAVE_USB1_RX, }; extern struct clk sh7372_extal1_clk; diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c index cd807ee..f2a58d4 100644 --- a/arch/arm/mach-shmobile/setup-sh7372.c +++ b/arch/arm/mach-shmobile/setup-sh7372.c @@ -602,6 +602,150 @@ static struct platform_device dma2_device = { }, }; +/* + * USB-DMAC + */ + +unsigned int usbts_shift[] = {3, 4, 5}; + +enum { + XMIT_SZ_8BYTE = 0, + XMIT_SZ_16BYTE = 1, + XMIT_SZ_32BYTE = 2, +}; + +#define USBTS_INDEX2VAL(i) (((i) & 3) << 6) + +static const struct sh_dmae_channel sh7372_usb_dmae_channels[] = { + { + .offset = 0, + }, { + .offset = 0x20, + }, +}; + +/* USB DMAC0 */ +static const struct sh_dmae_slave_config sh7372_usb_dmae0_slaves[] = { + { + .slave_id = SHDMA_SLAVE_USB0_TX, + .chcr = USBTS_INDEX2VAL(XMIT_SZ_8BYTE), + }, { + .slave_id = SHDMA_SLAVE_USB0_RX, + .chcr = USBTS_INDEX2VAL(XMIT_SZ_8BYTE), + }, +}; + +static struct sh_dmae_pdata usb_dma0_platform_data = { + .slave = sh7372_usb_dmae0_slaves, + .slave_num = ARRAY_SIZE(sh7372_usb_dmae0_slaves), + .channel = sh7372_usb_dmae_channels, + .channel_num = ARRAY_SIZE(sh7372_usb_dmae_channels), + .ts_low_shift = 6, + .ts_low_mask = 0xc0, + .ts_high_shift = 0, + .ts_high_mask = 0, + .ts_shift = usbts_shift, + .ts_shift_num = ARRAY_SIZE(usbts_shift), + .dmaor_init = DMAOR_DME, + .chcr_offset = 0x14, + .chcr_ie_bit = 1 << 5, + .dmaor_is_32bit = 1, + .needs_tend_set = 1, + .no_dmars = 1, +}; + +static struct resource sh7372_usb_dmae0_resources[] = { + { + /* Channel registers and DMAOR */ + .start = 0xe68a0020, + .end = 0xe68a0064 - 1, + .flags = IORESOURCE_MEM, + }, + { + /* VCR/SWR/DMICR */ + .start = 0xe68a0000, + .end = 0xe68a0014 - 1, + .flags = IORESOURCE_MEM, + }, + { + /* IRQ for channels */ + .start = evt2irq(0x0a00), + .end = evt2irq(0x0a00), + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device usb_dma0_device = { + .name = "sh-dma-engine", + .id = 3, + .resource = sh7372_usb_dmae0_resources, + .num_resources = ARRAY_SIZE(sh7372_usb_dmae0_resources), + .dev = { + .platform_data = &usb_dma0_platform_data, + }, +}; + +/* USB DMAC1 */ +static const struct sh_dmae_slave_config sh7372_usb_dmae1_slaves[] = { + { + .slave_id = SHDMA_SLAVE_USB1_TX, + .chcr = USBTS_INDEX2VAL(XMIT_SZ_8BYTE), + }, { + .slave_id = SHDMA_SLAVE_USB1_RX, + .chcr = USBTS_INDEX2VAL(XMIT_SZ_8BYTE), + }, +}; + +static struct sh_dmae_pdata usb_dma1_platform_data = { + .slave = sh7372_usb_dmae1_slaves, + .slave_num = ARRAY_SIZE(sh7372_usb_dmae1_slaves), + .channel = sh7372_usb_dmae_channels, + .channel_num = ARRAY_SIZE(sh7372_usb_dmae_channels), + .ts_low_shift = 6, + .ts_low_mask = 0xc0, + .ts_high_shift = 0, + .ts_high_mask = 0, + .ts_shift = usbts_shift, + .ts_shift_num = ARRAY_SIZE(usbts_shift), + .dmaor_init = DMAOR_DME, + .chcr_offset = 0x14, + .chcr_ie_bit = 1 << 5, + .dmaor_is_32bit = 1, + .needs_tend_set = 1, + .no_dmars = 1, +}; + +static struct resource sh7372_usb_dmae1_resources[] = { + { + /* Channel registers and DMAOR */ + .start = 0xe68c0020, + .end = 0xe68c0064 - 1, + .flags = IORESOURCE_MEM, + }, + { + /* VCR/SWR/DMICR */ + .start = 0xe68c0000, + .end = 0xe68c0014 - 1, + .flags = IORESOURCE_MEM, + }, + { + /* IRQ for channels */ + .start = evt2irq(0x1d00), + .end = evt2irq(0x1d00), + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device usb_dma1_device = { + .name = "sh-dma-engine", + .id = 4, + .resource = sh7372_usb_dmae1_resources, + .num_resources = ARRAY_SIZE(sh7372_usb_dmae1_resources), + .dev = { + .platform_data = &usb_dma1_platform_data, + }, +}; + /* VPU */ static struct uio_info vpu_platform_data = { .name = "VPU5HG", @@ -829,6 +973,8 @@ static struct platform_device *sh7372_late_devices[] __initdata = { &dma0_device, &dma1_device, &dma2_device, + &usb_dma0_device, + &usb_dma1_device, &vpu_device, &veu0_device, &veu1_device, -- cgit v1.1 From 0ed61fc9da59ea45d56a6928653691cef14bab9b Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 30 Jun 2011 09:22:50 +0000 Subject: ARM: mach-shmobile: Use CMT2 for timer on sh7372 Switch the sh7372 CPU support to use CMT2 instead of CMT1 for system timer. CMT1 is located in the A3SP power domain while CMT2 is located in the always-on power domain C5. This improves our PM situation - with CMT2 as timer we can power down A3SP and still access the timer. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/clock-sh7372.c | 8 ++++---- arch/arm/mach-shmobile/setup-sh7372.c | 30 +++++++++++++++--------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index c0800d8..7fb1d2e 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -511,8 +511,8 @@ enum { MSTP001, MSTP223, MSTP218, MSTP217, MSTP216, MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, - MSTP329, MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312, - MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP406, MSTP403, + MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312, + MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP406, MSTP403, MSTP400, MSTP_NR }; #define MSTP(_parent, _reg, _bit, _flags) \ @@ -545,7 +545,6 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP202] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 2, 0), /* SCIFA2 */ [MSTP201] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 1, 0), /* SCIFA3 */ [MSTP200] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 0, 0), /* SCIFA4 */ - [MSTP329] = MSTP(&r_clk, SMSTPCR3, 29, 0), /* CMT10 */ [MSTP328] = MSTP(&div6_clks[DIV6_SPU], SMSTPCR3, 28, 0), /* FSI2 */ [MSTP323] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 23, 0), /* IIC1 */ [MSTP322] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 22, 0), /* USB0 */ @@ -559,6 +558,7 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP410] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 10, 0), /* IIC4 */ [MSTP406] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 6, 0), /* USB1 */ [MSTP403] = MSTP(&r_clk, SMSTPCR4, 3, 0), /* KEYSC */ + [MSTP400] = MSTP(&r_clk, SMSTPCR4, 0, 0), /* CMT2 */ }; #define CLKDEV_CON_ID(_id, _clk) { .con_id = _id, .clk = _clk } @@ -640,7 +640,6 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh-sci.2", &mstp_clks[MSTP202]), /* SCIFA2 */ CLKDEV_DEV_ID("sh-sci.3", &mstp_clks[MSTP201]), /* SCIFA3 */ CLKDEV_DEV_ID("sh-sci.4", &mstp_clks[MSTP200]), /* SCIFA4 */ - CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */ CLKDEV_DEV_ID("sh_fsi2", &mstp_clks[MSTP328]), /* FSI2 */ CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* IIC1 */ CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP322]), /* USB0 */ @@ -658,6 +657,7 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("r8a66597_udc.1", &mstp_clks[MSTP406]), /* USB1 */ CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[MSTP406]), /* USB1 */ CLKDEV_DEV_ID("sh_keysc.0", &mstp_clks[MSTP403]), /* KEYSC */ + CLKDEV_DEV_ID("sh_cmt.2", &mstp_clks[MSTP400]), /* CMT2 */ CLKDEV_ICK_ID("ick", "sh-mobile-hdmi", &div6_reparent_clks[DIV6_HDMI]), CLKDEV_ICK_ID("icka", "sh_fsi2", &div6_reparent_clks[DIV6_FSIA]), diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c index cd807ee..cb8ac78 100644 --- a/arch/arm/mach-shmobile/setup-sh7372.c +++ b/arch/arm/mach-shmobile/setup-sh7372.c @@ -169,35 +169,35 @@ static struct platform_device scif6_device = { }; /* CMT */ -static struct sh_timer_config cmt10_platform_data = { - .name = "CMT10", - .channel_offset = 0x10, - .timer_bit = 0, +static struct sh_timer_config cmt2_platform_data = { + .name = "CMT2", + .channel_offset = 0x40, + .timer_bit = 5, .clockevent_rating = 125, .clocksource_rating = 125, }; -static struct resource cmt10_resources[] = { +static struct resource cmt2_resources[] = { [0] = { - .name = "CMT10", - .start = 0xe6138010, - .end = 0xe613801b, + .name = "CMT2", + .start = 0xe6130040, + .end = 0xe613004b, .flags = IORESOURCE_MEM, }, [1] = { - .start = evt2irq(0x0b00), /* CMT1_CMT10 */ + .start = evt2irq(0x0b80), /* CMT2 */ .flags = IORESOURCE_IRQ, }, }; -static struct platform_device cmt10_device = { +static struct platform_device cmt2_device = { .name = "sh_cmt", - .id = 10, + .id = 2, .dev = { - .platform_data = &cmt10_platform_data, + .platform_data = &cmt2_platform_data, }, - .resource = cmt10_resources, - .num_resources = ARRAY_SIZE(cmt10_resources), + .resource = cmt2_resources, + .num_resources = ARRAY_SIZE(cmt2_resources), }; /* TMU */ @@ -818,7 +818,7 @@ static struct platform_device *sh7372_early_devices[] __initdata = { &scif4_device, &scif5_device, &scif6_device, - &cmt10_device, + &cmt2_device, &tmu00_device, &tmu01_device, }; -- cgit v1.1 From b8e513a2ecafb5bb068c00be98d584871afcd4c3 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sat, 9 Jul 2011 21:11:14 +0000 Subject: ARM: static should be at beginning of declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that the 'static' keywork is at the beginning of declaration for arch/arm/mach-shmobile/board-ap4evb.c This gets rid of warnings like warning: ‘static’ is not at beginning of declaration when building with -Wold-style-declaration (and/or -Wextra which also enables it). Signed-off-by: Jesper Juhl Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ap4evb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 803bc6e..bb4e084 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -443,7 +443,7 @@ static struct platform_device usb1_host_device = { .resource = usb1_host_resources, }; -const static struct fb_videomode ap4evb_lcdc_modes[] = { +static const struct fb_videomode ap4evb_lcdc_modes[] = { { #ifdef CONFIG_AP4EVB_QHD .name = "R63302(QHD)", -- cgit v1.1 From 196cfe2ae8fcdc03b3c7d627e7dfe8c0ce7229f9 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Thu, 14 Jul 2011 15:30:22 +0200 Subject: xen-blkfront: Drop name and minor adjustments for emulated scsi devices These were intended to avoid the namespace clash when representing emulated IDE and SCSI devices. However that seems to confuse users more than expected (a disk defined as sda becomes xvde). So for now go back to the scheme which does no adjustments. This will break when mixing IDE and SCSI names in the configuration of guests but should be by now expected. Acked-by: Stefano Stabellini Signed-off-by: Stefan Bader Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b536a9c..238b941 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -123,8 +123,8 @@ static DEFINE_SPINLOCK(minor_lock); #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) #define EMULATED_HD_DISK_MINOR_OFFSET (0) #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) -#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16)) -#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4) +#define EMULATED_SD_DISK_MINOR_OFFSET (0) +#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256) #define DEV_NAME "xvd" /* name in /dev */ -- cgit v1.1 From 89153b5cae9f40c224a5d321665a97bf14220c2c Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Thu, 14 Jul 2011 15:30:37 +0200 Subject: xen-blkfront: Fix one off warning about name clash Avoid telling users to use xvde and onwards when using xvde. Acked-by: Stefano Stabellini Signed-off-by: Stefan Bader Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 238b941..9ea8c25 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -529,7 +529,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, minor = BLKIF_MINOR_EXT(info->vdevice); nr_parts = PARTS_PER_EXT_DISK; offset = minor / nr_parts; - if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4) + if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4) printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " "emulated IDE disks,\n\t choose an xvd device name" "from xvde on\n", info->vdevice); -- cgit v1.1 From 3f7e5e2423f6233f7665d54061ba7761ca90cf52 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Jul 2011 07:59:48 +0000 Subject: clocksource: sh_cmt: wait for CMCNT on init V2 Add code to the CMT driver to wait for CMCNT V2. This to let the register value settle before starting the timer channel. Makes the driver more robust. Needed for CMT2 on sh7372 and certain CMT channels on sh73a0. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/clocksource/sh_cmt.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index dc7c033..32a77be 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -150,13 +151,13 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start) static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate) { - int ret; + int k, ret; /* enable clock */ ret = clk_enable(p->clk); if (ret) { dev_err(&p->pdev->dev, "cannot enable clock\n"); - return ret; + goto err0; } /* make sure channel is disabled */ @@ -174,9 +175,38 @@ static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate) sh_cmt_write(p, CMCOR, 0xffffffff); sh_cmt_write(p, CMCNT, 0); + /* + * According to the sh73a0 user's manual, as CMCNT can be operated + * only by the RCLK (Pseudo 32 KHz), there's one restriction on + * modifying CMCNT register; two RCLK cycles are necessary before + * this register is either read or any modification of the value + * it holds is reflected in the LSI's actual operation. + * + * While at it, we're supposed to clear out the CMCNT as of this + * moment, so make sure it's processed properly here. This will + * take RCLKx2 at maximum. + */ + for (k = 0; k < 100; k++) { + if (!sh_cmt_read(p, CMCNT)) + break; + udelay(1); + } + + if (sh_cmt_read(p, CMCNT)) { + dev_err(&p->pdev->dev, "cannot clear CMCNT\n"); + ret = -ETIMEDOUT; + goto err1; + } + /* enable channel */ sh_cmt_start_stop_ch(p, 1); return 0; + err1: + /* stop clock */ + clk_disable(p->clk); + + err0: + return ret; } static void sh_cmt_disable(struct sh_cmt_priv *p) -- cgit v1.1 From b4300b72cfc01ea75b8aaede574bdfb04545d691 Mon Sep 17 00:00:00 2001 From: David Engraf Date: Wed, 20 Jul 2011 13:03:39 +0000 Subject: shwdt: fix usage of mod_timer This patch fixes the usage of mod_timer and makes the driver usable. mod_timer must be called with an absolute timeout in jiffies, the old implementation used a relative timeout thus the hardware watchdog was never triggered. Signed-off-by: David Engraf Signed-off-by: Paul Mundt --- drivers/watchdog/shwdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/shwdt.c b/drivers/watchdog/shwdt.c index db84f23..a267dc0 100644 --- a/drivers/watchdog/shwdt.c +++ b/drivers/watchdog/shwdt.c @@ -64,7 +64,7 @@ * misses its deadline, the kernel timer will allow the WDT to overflow. */ static int clock_division_ratio = WTCSR_CKS_4096; -#define next_ping_period(cks) msecs_to_jiffies(cks - 4) +#define next_ping_period(cks) (jiffies + msecs_to_jiffies(cks - 4)) static const struct watchdog_info sh_wdt_info; static struct platform_device *sh_wdt_dev; -- cgit v1.1 From 9a14a92c939aea1aaf27f5ad37b26b235acc2a65 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 15 Jul 2011 10:58:55 +0000 Subject: sh: intc: enable both edges GPIO interrupts on sh7372 IRQ-capable GPIOs on sh7372 can be configured to produce interrupts on both edges. Signed-off-by: Guennadi Liakhovetski Acked-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/sh/intc/chip.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/sh/intc/chip.c b/drivers/sh/intc/chip.c index f33e2dd..33b2ed4 100644 --- a/drivers/sh/intc/chip.c +++ b/drivers/sh/intc/chip.c @@ -186,6 +186,9 @@ static unsigned char intc_irq_sense_table[IRQ_TYPE_SENSE_MASK + 1] = { !defined(CONFIG_CPU_SUBTYPE_SH7709) [IRQ_TYPE_LEVEL_HIGH] = VALID(3), #endif +#if defined(CONFIG_ARCH_SH7372) + [IRQ_TYPE_EDGE_BOTH] = VALID(4), +#endif }; static int intc_set_type(struct irq_data *data, unsigned int type) -- cgit v1.1 From c5ad48f3117c4aae379b536f3270fc1efae945c0 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 20 Jun 2011 23:00:12 +0000 Subject: ARM: mach-shmobile: ag5evm: SDHI requires waiting for idle The SDHI block on the ag5evm requires waiting for idle before writing to some registers. Cc: Guennadi Liakhovetski Cc: Magnus Damm Signed-off-by: Simon Horman Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ag5evm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c index ce5c251..cdfdd62 100644 --- a/arch/arm/mach-shmobile/board-ag5evm.c +++ b/arch/arm/mach-shmobile/board-ag5evm.c @@ -341,6 +341,7 @@ static struct platform_device mipidsi0_device = { static struct sh_mobile_sdhi_info sdhi0_info = { .dma_slave_tx = SHDMA_SLAVE_SDHI0_TX, .dma_slave_rx = SHDMA_SLAVE_SDHI0_RX, + .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT, .tmio_caps = MMC_CAP_SD_HIGHSPEED, .tmio_ocr_mask = MMC_VDD_27_28 | MMC_VDD_28_29, }; @@ -382,7 +383,7 @@ void ag5evm_sdhi1_set_pwr(struct platform_device *pdev, int state) } static struct sh_mobile_sdhi_info sh_sdhi1_info = { - .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE, + .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT, .tmio_caps = MMC_CAP_NONREMOVABLE | MMC_CAP_SDIO_IRQ, .tmio_ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, .set_pwr = ag5evm_sdhi1_set_pwr, -- cgit v1.1 From 00fe1ae91e0d69e52e8212d23cd3ecc74a7259a0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Jul 2011 16:24:46 +0200 Subject: netfilter: xt_rateest: fix xt_rateest_mt_checkentry() commit 4a5a5c73b7cfee (slightly better error reporting) added some useless code in xt_rateest_mt_checkentry(). Fix this so that different error codes can really be returned. Signed-off-by: Eric Dumazet CC: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/xt_rateest.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 76a0831..ed0db15 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -78,7 +78,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_rateest_match_info *info = par->matchinfo; struct xt_rateest *est1, *est2; - int ret = false; + int ret = -EINVAL; if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | XT_RATEEST_MATCH_REL)) != 1) @@ -101,13 +101,12 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) if (!est1) goto err1; + est2 = NULL; if (info->flags & XT_RATEEST_MATCH_REL) { est2 = xt_rateest_lookup(info->name2); if (!est2) goto err2; - } else - est2 = NULL; - + } info->est1 = est1; info->est2 = est2; @@ -116,7 +115,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) err2: xt_rateest_put(est1); err1: - return -EINVAL; + return ret; } static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) -- cgit v1.1 From 91c66c6893a3e2bb8a88a30cb76007d5d49d32c9 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Fri, 29 Jul 2011 16:38:49 +0200 Subject: netfilter: ip_queue: Fix small leak in ipq_build_packet_message() ipq_build_packet_message() in net/ipv4/netfilter/ip_queue.c and net/ipv6/netfilter/ip6_queue.c contain a small potential mem leak as far as I can tell. We allocate memory for 'skb' with alloc_skb() annd then call nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); NLMSG_PUT is a macro NLMSG_PUT(skb, pid, seq, type, len) \ NLMSG_NEW(skb, pid, seq, type, len, 0) that expands to NLMSG_NEW, which is also a macro which expands to: NLMSG_NEW(skb, pid, seq, type, len, flags) \ ({ if (unlikely(skb_tailroom(skb) < (int)NLMSG_SPACE(len))) \ goto nlmsg_failure; \ __nlmsg_put(skb, pid, seq, type, len, flags); }) If we take the true branch of the 'if' statement and 'goto nlmsg_failure', then we'll, at that point, return from ipq_build_packet_message() without having assigned 'skb' to anything and we'll leak the memory we allocated for it when it goes out of scope. Fix this by placing a 'kfree(skb)' at 'nlmsg_failure'. I admit that I do not know how likely this to actually happen or even if there's something that guarantees that it will never happen - I'm not that familiar with this code, but if that is so, I've not been able to spot it. Signed-off-by: Jesper Juhl Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_queue.c | 1 + net/ipv6/netfilter/ip6_queue.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 5c9b9d9..48f7d5b 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: + kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip_queue: error creating packet message\n"); return NULL; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 2493948..87b243a 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: + kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip6_queue: error creating packet message\n"); return NULL; -- cgit v1.1 From 9823d9ff483af4ce8804a9eb69600ca739cd1f58 Mon Sep 17 00:00:00 2001 From: Bart De Schuymer Date: Fri, 29 Jul 2011 16:40:30 +0200 Subject: netfilter: ebtables: fix ebtables build dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The configuration of ebtables shouldn't depend on CONFIG_BRIDGE_NETFILTER, only on CONFIG_NETFILTER. Reported-by: Sbastien Laveze Signed-off-by: Bart De Schuymer Signed-off-by: Patrick McHardy --- net/bridge/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index ba6f73e..a9aff9c 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -4,7 +4,7 @@ menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" - depends on BRIDGE && BRIDGE_NETFILTER + depends on BRIDGE && NETFILTER select NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification -- cgit v1.1 From aa387cc895672b00f807ad7c734a2defaf677712 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 31 Jul 2011 22:05:09 +0200 Subject: block: add bsg helper library This moves the FC classes bsg code to the block layer and makes it a lib so that other classes like iscsi and SAS can use it. It is helpful because working with the request queue, bios, creating scatterlists, etc are a pain that the LLD does not have to worry about with normal IOs and should not have to worry about for bsg requests. Signed-off-by: Mike Christie Signed-off-by: Jens Axboe --- block/Kconfig | 10 ++ block/Makefile | 1 + block/bsg-lib.c | 297 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 4 + include/linux/bsg-lib.h | 73 ++++++++++++ 5 files changed, 385 insertions(+) create mode 100644 block/bsg-lib.c create mode 100644 include/linux/bsg-lib.h diff --git a/block/Kconfig b/block/Kconfig index 60be1e0..e97934e 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -65,6 +65,16 @@ config BLK_DEV_BSG If unsure, say Y. +config BLK_DEV_BSGLIB + bool "Block layer SG support v4 helper lib" + default n + select BLK_DEV_BSG + help + Subsystems will normally enable this if needed. Users will not + normally need to manually enable this. + + If unsure, say N. + config BLK_DEV_INTEGRITY bool "Block layer data integrity support" ---help--- diff --git a/block/Makefile b/block/Makefile index 0fec4b3..514c6e4 100644 --- a/block/Makefile +++ b/block/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o +obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/bsg-lib.c b/block/bsg-lib.c new file mode 100644 index 0000000..f8c0a61 --- /dev/null +++ b/block/bsg-lib.c @@ -0,0 +1,297 @@ +/* + * BSG helper library + * + * Copyright (C) 2008 James Smart, Emulex Corporation + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2011 Mike Christie + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include +#include + +/** + * bsg_destroy_job - routine to teardown/delete a bsg job + * @job: bsg_job that is to be torn down + */ +static void bsg_destroy_job(struct bsg_job *job) +{ + put_device(job->dev); /* release reference for the request */ + + kfree(job->request_payload.sg_list); + kfree(job->reply_payload.sg_list); + kfree(job); +} + +/** + * bsg_job_done - completion routine for bsg requests + * @job: bsg_job that is complete + * @result: job reply result + * @reply_payload_rcv_len: length of payload recvd + * + * The LLD should call this when the bsg job has completed. + */ +void bsg_job_done(struct bsg_job *job, int result, + unsigned int reply_payload_rcv_len) +{ + struct request *req = job->req; + struct request *rsp = req->next_rq; + int err; + + err = job->req->errors = result; + if (err < 0) + /* we're only returning the result field in the reply */ + job->req->sense_len = sizeof(u32); + else + job->req->sense_len = job->reply_len; + /* we assume all request payload was transferred, residual == 0 */ + req->resid_len = 0; + + if (rsp) { + WARN_ON(reply_payload_rcv_len > rsp->resid_len); + + /* set reply (bidi) residual */ + rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len); + } + blk_complete_request(req); +} +EXPORT_SYMBOL_GPL(bsg_job_done); + +/** + * bsg_softirq_done - softirq done routine for destroying the bsg requests + * @rq: BSG request that holds the job to be destroyed + */ +static void bsg_softirq_done(struct request *rq) +{ + struct bsg_job *job = rq->special; + + blk_end_request_all(rq, rq->errors); + bsg_destroy_job(job); +} + +static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) +{ + size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments); + + BUG_ON(!req->nr_phys_segments); + + buf->sg_list = kzalloc(sz, GFP_KERNEL); + if (!buf->sg_list) + return -ENOMEM; + sg_init_table(buf->sg_list, req->nr_phys_segments); + buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list); + buf->payload_len = blk_rq_bytes(req); + return 0; +} + +/** + * bsg_create_job - create the bsg_job structure for the bsg request + * @dev: device that is being sent the bsg request + * @req: BSG request that needs a job structure + */ +static int bsg_create_job(struct device *dev, struct request *req) +{ + struct request *rsp = req->next_rq; + struct request_queue *q = req->q; + struct bsg_job *job; + int ret; + + BUG_ON(req->special); + + job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL); + if (!job) + return -ENOMEM; + + req->special = job; + job->req = req; + if (q->bsg_job_size) + job->dd_data = (void *)&job[1]; + job->request = req->cmd; + job->request_len = req->cmd_len; + job->reply = req->sense; + job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer + * allocated */ + if (req->bio) { + ret = bsg_map_buffer(&job->request_payload, req); + if (ret) + goto failjob_rls_job; + } + if (rsp && rsp->bio) { + ret = bsg_map_buffer(&job->reply_payload, rsp); + if (ret) + goto failjob_rls_rqst_payload; + } + job->dev = dev; + /* take a reference for the request */ + get_device(job->dev); + return 0; + +failjob_rls_rqst_payload: + kfree(job->request_payload.sg_list); +failjob_rls_job: + kfree(job); + return -ENOMEM; +} + +/* + * bsg_goose_queue - restart queue in case it was stopped + * @q: request q to be restarted + */ +void bsg_goose_queue(struct request_queue *q) +{ + if (!q) + return; + + blk_run_queue_async(q); +} +EXPORT_SYMBOL_GPL(bsg_goose_queue); + +/** + * bsg_request_fn - generic handler for bsg requests + * @q: request queue to manage + * + * On error the create_bsg_job function should return a -Exyz error value + * that will be set to the req->errors. + * + * Drivers/subsys should pass this to the queue init function. + */ +void bsg_request_fn(struct request_queue *q) +{ + struct device *dev = q->queuedata; + struct request *req; + struct bsg_job *job; + int ret; + + if (!get_device(dev)) + return; + + while (1) { + req = blk_fetch_request(q); + if (!req) + break; + spin_unlock_irq(q->queue_lock); + + ret = bsg_create_job(dev, req); + if (ret) { + req->errors = ret; + blk_end_request_all(req, ret); + spin_lock_irq(q->queue_lock); + continue; + } + + job = req->special; + ret = q->bsg_job_fn(job); + spin_lock_irq(q->queue_lock); + if (ret) + break; + } + + spin_unlock_irq(q->queue_lock); + put_device(dev); + spin_lock_irq(q->queue_lock); +} +EXPORT_SYMBOL_GPL(bsg_request_fn); + +/** + * bsg_setup_queue - Create and add the bsg hooks so we can receive requests + * @dev: device to attach bsg device to + * @q: request queue setup by caller + * @name: device to give bsg device + * @job_fn: bsg job handler + * @dd_job_size: size of LLD data needed for each job + * + * The caller should have setup the reuqest queue with bsg_request_fn + * as the request_fn. + */ +int bsg_setup_queue(struct device *dev, struct request_queue *q, + char *name, bsg_job_fn *job_fn, int dd_job_size) +{ + int ret; + + q->queuedata = dev; + q->bsg_job_size = dd_job_size; + q->bsg_job_fn = job_fn; + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); + blk_queue_softirq_done(q, bsg_softirq_done); + blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); + + ret = bsg_register_queue(q, dev, name, NULL); + if (ret) { + printk(KERN_ERR "%s: bsg interface failed to " + "initialize - register queue\n", dev->kobj.name); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(bsg_setup_queue); + +/** + * bsg_remove_queue - Deletes the bsg dev from the q + * @q: the request_queue that is to be torn down. + * + * Notes: + * Before unregistering the queue empty any requests that are blocked + */ +void bsg_remove_queue(struct request_queue *q) +{ + struct request *req; /* block request */ + int counts; /* totals for request_list count and starved */ + + if (!q) + return; + + /* Stop taking in new requests */ + spin_lock_irq(q->queue_lock); + blk_stop_queue(q); + + /* drain all requests in the queue */ + while (1) { + /* need the lock to fetch a request + * this may fetch the same reqeust as the previous pass + */ + req = blk_fetch_request(q); + /* save requests in use and starved */ + counts = q->rq.count[0] + q->rq.count[1] + + q->rq.starved[0] + q->rq.starved[1]; + spin_unlock_irq(q->queue_lock); + /* any requests still outstanding? */ + if (counts == 0) + break; + + /* This may be the same req as the previous iteration, + * always send the blk_end_request_all after a prefetch. + * It is not okay to not end the request because the + * prefetch started the request. + */ + if (req) { + /* return -ENXIO to indicate that this queue is + * going away + */ + req->errors = -ENXIO; + blk_end_request_all(req, -ENXIO); + } + + msleep(200); /* allow bsg to possibly finish */ + spin_lock_irq(q->queue_lock); + } + bsg_unregister_queue(q); +} +EXPORT_SYMBOL_GPL(bsg_remove_queue); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0e67c45..8479285 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -30,6 +30,7 @@ struct request_pm_state; struct blk_trace; struct request; struct sg_io_hdr; +struct bsg_job; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -209,6 +210,7 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); typedef int (lld_busy_fn) (struct request_queue *q); +typedef int (bsg_job_fn) (struct bsg_job *); enum blk_eh_timer_return { BLK_EH_NOT_HANDLED, @@ -375,6 +377,8 @@ struct request_queue { struct mutex sysfs_lock; #if defined(CONFIG_BLK_DEV_BSG) + bsg_job_fn *bsg_job_fn; + int bsg_job_size; struct bsg_class_device bsg_dev; #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h new file mode 100644 index 0000000..f55ab8c --- /dev/null +++ b/include/linux/bsg-lib.h @@ -0,0 +1,73 @@ +/* + * BSG helper library + * + * Copyright (C) 2008 James Smart, Emulex Corporation + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2011 Mike Christie + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _BLK_BSG_ +#define _BLK_BSG_ + +#include + +struct request; +struct device; +struct scatterlist; +struct request_queue; + +struct bsg_buffer { + unsigned int payload_len; + int sg_cnt; + struct scatterlist *sg_list; +}; + +struct bsg_job { + struct device *dev; + struct request *req; + + /* Transport/driver specific request/reply structs */ + void *request; + void *reply; + + unsigned int request_len; + unsigned int reply_len; + /* + * On entry : reply_len indicates the buffer size allocated for + * the reply. + * + * Upon completion : the message handler must set reply_len + * to indicates the size of the reply to be returned to the + * caller. + */ + + /* DMA payloads for the request/response */ + struct bsg_buffer request_payload; + struct bsg_buffer reply_payload; + + void *dd_data; /* Used for driver-specific storage */ +}; + +void bsg_job_done(struct bsg_job *job, int result, + unsigned int reply_payload_rcv_len); +int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name, + bsg_job_fn *job_fn, int dd_job_size); +void bsg_request_fn(struct request_queue *q); +void bsg_remove_queue(struct request_queue *q); +void bsg_goose_queue(struct request_queue *q); + +#endif -- cgit v1.1 From 34dd82afd27da2537199d7f71f1542501c6f96e7 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 31 Jul 2011 22:08:04 +0200 Subject: loop: replace linked list of allocated devices with an idr index Replace the linked list, that keeps track of allocated devices, with an idr index to allow a more efficient lookup of devices. Cc: Tejun Heo Signed-off-by: Kay Sievers Signed-off-by: Jens Axboe --- drivers/block/loop.c | 152 +++++++++++++++++++++++++++------------------------ include/linux/loop.h | 1 - 2 files changed, 80 insertions(+), 73 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 76c8da7..f58532e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -78,8 +78,8 @@ #include -static LIST_HEAD(loop_devices); -static DEFINE_MUTEX(loop_devices_mutex); +static DEFINE_IDR(loop_index_idr); +static DEFINE_MUTEX(loop_index_mutex); static int max_part; static int part_shift; @@ -722,17 +722,10 @@ static inline int is_loop_device(struct file *file) static ssize_t loop_attr_show(struct device *dev, char *page, ssize_t (*callback)(struct loop_device *, char *)) { - struct loop_device *l, *lo = NULL; - - mutex_lock(&loop_devices_mutex); - list_for_each_entry(l, &loop_devices, lo_list) - if (disk_to_dev(l->lo_disk) == dev) { - lo = l; - break; - } - mutex_unlock(&loop_devices_mutex); + struct gendisk *disk = dev_to_disk(dev); + struct loop_device *lo = disk->private_data; - return lo ? callback(lo, page) : -EIO; + return callback(lo, page); } #define LOOP_ATTR_RO(_name) \ @@ -1557,40 +1550,64 @@ int loop_register_transfer(struct loop_func_table *funcs) return 0; } +static int unregister_transfer_cb(int id, void *ptr, void *data) +{ + struct loop_device *lo = ptr; + struct loop_func_table *xfer = data; + + mutex_lock(&lo->lo_ctl_mutex); + if (lo->lo_encryption == xfer) + loop_release_xfer(lo); + mutex_unlock(&lo->lo_ctl_mutex); + return 0; +} + int loop_unregister_transfer(int number) { unsigned int n = number; - struct loop_device *lo; struct loop_func_table *xfer; if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) return -EINVAL; xfer_funcs[n] = NULL; - - list_for_each_entry(lo, &loop_devices, lo_list) { - mutex_lock(&lo->lo_ctl_mutex); - - if (lo->lo_encryption == xfer) - loop_release_xfer(lo); - - mutex_unlock(&lo->lo_ctl_mutex); - } - + idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer); return 0; } EXPORT_SYMBOL(loop_register_transfer); EXPORT_SYMBOL(loop_unregister_transfer); -static struct loop_device *loop_alloc(int i) +static int loop_add(struct loop_device **l, int i) { struct loop_device *lo; struct gendisk *disk; + int err; lo = kzalloc(sizeof(*lo), GFP_KERNEL); - if (!lo) + if (!lo) { + err = -ENOMEM; goto out; + } + + err = idr_pre_get(&loop_index_idr, GFP_KERNEL); + if (err < 0) + goto out_free_dev; + + if (i >= 0) { + int m; + + /* create specific i in the index */ + err = idr_get_new_above(&loop_index_idr, lo, i, &m); + if (err >= 0 && i != m) { + idr_remove(&loop_index_idr, m); + err = -EEXIST; + } + } else { + err = -EINVAL; + } + if (err < 0) + goto out_free_dev; lo->lo_queue = blk_alloc_queue(GFP_KERNEL); if (!lo->lo_queue) @@ -1611,56 +1628,54 @@ static struct loop_device *loop_alloc(int i) disk->private_data = lo; disk->queue = lo->lo_queue; sprintf(disk->disk_name, "loop%d", i); - return lo; + add_disk(disk); + *l = lo; + return lo->lo_number; out_free_queue: blk_cleanup_queue(lo->lo_queue); out_free_dev: kfree(lo); out: - return NULL; + return err; } -static void loop_free(struct loop_device *lo) +static void loop_remove(struct loop_device *lo) { + del_gendisk(lo->lo_disk); blk_cleanup_queue(lo->lo_queue); put_disk(lo->lo_disk); - list_del(&lo->lo_list); kfree(lo); } -static struct loop_device *loop_init_one(int i) +static int loop_lookup(struct loop_device **l, int i) { struct loop_device *lo; + int ret = -ENODEV; - list_for_each_entry(lo, &loop_devices, lo_list) { - if (lo->lo_number == i) - return lo; - } - - lo = loop_alloc(i); + lo = idr_find(&loop_index_idr, i); if (lo) { - add_disk(lo->lo_disk); - list_add_tail(&lo->lo_list, &loop_devices); + *l = lo; + ret = lo->lo_number; } - return lo; -} - -static void loop_del_one(struct loop_device *lo) -{ - del_gendisk(lo->lo_disk); - loop_free(lo); + return ret; } static struct kobject *loop_probe(dev_t dev, int *part, void *data) { struct loop_device *lo; struct kobject *kobj; + int err; - mutex_lock(&loop_devices_mutex); - lo = loop_init_one(MINOR(dev) >> part_shift); - kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); - mutex_unlock(&loop_devices_mutex); + mutex_lock(&loop_index_mutex); + err = loop_lookup(&lo, MINOR(dev) >> part_shift); + if (err < 0) + err = loop_add(&lo, MINOR(dev) >> part_shift); + if (err < 0) + kobj = ERR_PTR(err); + else + kobj = get_disk(lo->lo_disk); + mutex_unlock(&loop_index_mutex); *part = 0; return kobj; @@ -1670,7 +1685,7 @@ static int __init loop_init(void) { int i, nr; unsigned long range; - struct loop_device *lo, *next; + struct loop_device *lo; /* * loop module now has a feature to instantiate underlying device @@ -1719,43 +1734,36 @@ static int __init loop_init(void) if (register_blkdev(LOOP_MAJOR, "loop")) return -EIO; - for (i = 0; i < nr; i++) { - lo = loop_alloc(i); - if (!lo) - goto Enomem; - list_add_tail(&lo->lo_list, &loop_devices); - } - - /* point of no return */ - - list_for_each_entry(lo, &loop_devices, lo_list) - add_disk(lo->lo_disk); - blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); + /* pre-create number devices of devices given by config or max_loop */ + mutex_lock(&loop_index_mutex); + for (i = 0; i < nr; i++) + loop_add(&lo, i); + mutex_unlock(&loop_index_mutex); + printk(KERN_INFO "loop: module loaded\n"); return 0; +} -Enomem: - printk(KERN_INFO "loop: out of memory\n"); - - list_for_each_entry_safe(lo, next, &loop_devices, lo_list) - loop_free(lo); +static int loop_exit_cb(int id, void *ptr, void *data) +{ + struct loop_device *lo = ptr; - unregister_blkdev(LOOP_MAJOR, "loop"); - return -ENOMEM; + loop_remove(lo); + return 0; } static void __exit loop_exit(void) { unsigned long range; - struct loop_device *lo, *next; range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; - list_for_each_entry_safe(lo, next, &loop_devices, lo_list) - loop_del_one(lo); + idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); + idr_remove_all(&loop_index_idr); + idr_destroy(&loop_index_idr); blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); diff --git a/include/linux/loop.h b/include/linux/loop.h index 66c194e..5f08d18 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -64,7 +64,6 @@ struct loop_device { struct request_queue *lo_queue; struct gendisk *lo_disk; - struct list_head lo_list; }; #endif /* __KERNEL__ */ -- cgit v1.1 From 770fe30a46a12b6fb6b63fbe1737654d28e84844 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 31 Jul 2011 22:08:04 +0200 Subject: loop: add management interface for on-demand device allocation Loop devices today have a fixed pre-allocated number of usually 8. The number can only be changed at module init time. To find a free device to use, /dev/loop%i needs to be scanned, and all devices need to be opened until a free one is possibly found. This adds a new /dev/loop-control device node, that allows to dynamically find or allocate a free device, and to add and remove loop devices from the running system: LOOP_CTL_ADD adds a specific device. Arg is the number of the device. It returns the device i or a negative error code. LOOP_CTL_REMOVE removes a specific device, Arg is the number the device. It returns the device i or a negative error code. LOOP_CTL_GET_FREE finds the next unbound device or allocates a new one. No arg is given. It returns the device i or a negative error code. The loop kernel module gets automatically loaded when /dev/loop-control is accessed the first time. The alias specified in the module, instructs udev to create this 'dead' device node, even when the module is not loaded. Example: cfd = open("/dev/loop-control", O_RDWR); # add a new specific loop device err = ioctl(cfd, LOOP_CTL_ADD, devnr); # remove a specific loop device err = ioctl(cfd, LOOP_CTL_REMOVE, devnr); # find or allocate a free loop device to use devnr = ioctl(cfd, LOOP_CTL_GET_FREE); sprintf(loopname, "/dev/loop%i", devnr); ffd = open("backing-file", O_RDWR); lfd = open(loopname, O_RDWR); err = ioctl(lfd, LOOP_SET_FD, ffd); Cc: Tejun Heo Cc: Karel Zak Signed-off-by: Kay Sievers Signed-off-by: Jens Axboe --- drivers/block/loop.c | 120 +++++++++++++++++++++++++++++++++++++++++++-- include/linux/loop.h | 4 ++ include/linux/miscdevice.h | 1 + 3 files changed, 121 insertions(+), 4 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f58532e..5c9edf9 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -75,7 +75,7 @@ #include #include #include - +#include #include static DEFINE_IDR(loop_index_idr); @@ -1478,13 +1478,22 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, static int lo_open(struct block_device *bdev, fmode_t mode) { - struct loop_device *lo = bdev->bd_disk->private_data; + struct loop_device *lo; + int err = 0; + + mutex_lock(&loop_index_mutex); + lo = bdev->bd_disk->private_data; + if (!lo) { + err = -ENXIO; + goto out; + } mutex_lock(&lo->lo_ctl_mutex); lo->lo_refcnt++; mutex_unlock(&lo->lo_ctl_mutex); - - return 0; +out: + mutex_unlock(&loop_index_mutex); + return err; } static int lo_release(struct gendisk *disk, fmode_t mode) @@ -1603,6 +1612,13 @@ static int loop_add(struct loop_device **l, int i) idr_remove(&loop_index_idr, m); err = -EEXIST; } + } else if (i == -1) { + int m; + + /* get next free nr */ + err = idr_get_new(&loop_index_idr, lo, &m); + if (err >= 0) + i = m; } else { err = -EINVAL; } @@ -1648,16 +1664,41 @@ static void loop_remove(struct loop_device *lo) kfree(lo); } +static int find_free_cb(int id, void *ptr, void *data) +{ + struct loop_device *lo = ptr; + struct loop_device **l = data; + + if (lo->lo_state == Lo_unbound) { + *l = lo; + return 1; + } + return 0; +} + static int loop_lookup(struct loop_device **l, int i) { struct loop_device *lo; int ret = -ENODEV; + if (i < 0) { + int err; + + err = idr_for_each(&loop_index_idr, &find_free_cb, &lo); + if (err == 1) { + *l = lo; + ret = lo->lo_number; + } + goto out; + } + + /* lookup and return a specific i */ lo = idr_find(&loop_index_idr, i); if (lo) { *l = lo; ret = lo->lo_number; } +out: return ret; } @@ -1681,11 +1722,76 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) return kobj; } +static long loop_control_ioctl(struct file *file, unsigned int cmd, + unsigned long parm) +{ + struct loop_device *lo; + int ret = -ENOSYS; + + mutex_lock(&loop_index_mutex); + switch (cmd) { + case LOOP_CTL_ADD: + ret = loop_lookup(&lo, parm); + if (ret >= 0) { + ret = -EEXIST; + break; + } + ret = loop_add(&lo, parm); + break; + case LOOP_CTL_REMOVE: + ret = loop_lookup(&lo, parm); + if (ret < 0) + break; + mutex_lock(&lo->lo_ctl_mutex); + if (lo->lo_state != Lo_unbound) { + ret = -EBUSY; + mutex_unlock(&lo->lo_ctl_mutex); + break; + } + if (lo->lo_refcnt > 0) { + ret = -EBUSY; + mutex_unlock(&lo->lo_ctl_mutex); + break; + } + lo->lo_disk->private_data = NULL; + mutex_unlock(&lo->lo_ctl_mutex); + idr_remove(&loop_index_idr, lo->lo_number); + loop_remove(lo); + break; + case LOOP_CTL_GET_FREE: + ret = loop_lookup(&lo, -1); + if (ret >= 0) + break; + ret = loop_add(&lo, -1); + } + mutex_unlock(&loop_index_mutex); + + return ret; +} + +static const struct file_operations loop_ctl_fops = { + .open = nonseekable_open, + .unlocked_ioctl = loop_control_ioctl, + .compat_ioctl = loop_control_ioctl, + .owner = THIS_MODULE, + .llseek = noop_llseek, +}; + +static struct miscdevice loop_misc = { + .minor = LOOP_CTRL_MINOR, + .name = "loop-control", + .fops = &loop_ctl_fops, +}; + +MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR); +MODULE_ALIAS("devname:loop-control"); + static int __init loop_init(void) { int i, nr; unsigned long range; struct loop_device *lo; + int err; /* * loop module now has a feature to instantiate underlying device @@ -1702,6 +1808,10 @@ static int __init loop_init(void) * device on-demand. */ + err = misc_register(&loop_misc); + if (err < 0) + return err; + part_shift = 0; if (max_part > 0) { part_shift = fls(max_part); @@ -1767,6 +1877,8 @@ static void __exit loop_exit(void) blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); + + misc_deregister(&loop_misc); } module_init(loop_init); diff --git a/include/linux/loop.h b/include/linux/loop.h index 5f08d18..683d698 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -160,4 +160,8 @@ int loop_unregister_transfer(int number); #define LOOP_CHANGE_FD 0x4C06 #define LOOP_SET_CAPACITY 0x4C07 +/* /dev/loop-control interface */ +#define LOOP_CTL_ADD 0x4C80 +#define LOOP_CTL_REMOVE 0x4C81 +#define LOOP_CTL_GET_FREE 0x4C82 #endif diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 18fd130..c309b1e 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -40,6 +40,7 @@ #define BTRFS_MINOR 234 #define AUTOFS_MINOR 235 #define MAPPER_CTRL_MINOR 236 +#define LOOP_CTRL_MINOR 237 #define MISC_DYNAMIC_MINOR 255 struct device; -- cgit v1.1 From d134b00b9acca3fb054d7c88a5f5d562ecbb42d1 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 31 Jul 2011 22:08:04 +0200 Subject: loop: add BLK_DEV_LOOP_MIN_COUNT=%i to allow distros 0 pre-allocated loop devices Instead of unconditionally creating a fixed number of dead loop devices which need to be investigated by storage handling services, even when they are never used, we allow distros start with 0 loop devices and have losetup(8) and similar switch to the dynamic /dev/loop-control interface instead of searching /dev/loop%i for free devices. Signed-off-by: Kay Sievers Signed-off-by: Jens Axboe --- Documentation/kernel-parameters.txt | 9 ++++++--- drivers/block/Kconfig | 15 +++++++++++++++ drivers/block/loop.c | 27 ++++++++++----------------- 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4ca9389..c328511 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1340,9 +1340,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. it is equivalent to "nosmp", which also disables the IO APIC. - max_loop= [LOOP] Maximum number of loopback devices that can - be mounted - Format: <1-256> + max_loop= [LOOP] The number of loop block devices that get + (loop.max_loop) unconditionally pre-created at init time. The default + number is configured by BLK_DEV_LOOP_MIN_COUNT. Instead + of statically allocating a predefined number, loop + devices can be requested on-demand with the + /dev/loop-control interface. mcatest= [IA-64] diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 717d6e4..57212c5 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -256,6 +256,21 @@ config BLK_DEV_LOOP Most users will answer N here. +config BLK_DEV_LOOP_MIN_COUNT + int "Number of loop devices to pre-create at init time" + depends on BLK_DEV_LOOP + default 8 + help + Static number of loop devices to be unconditionally pre-created + at init time. + + This default value can be overwritten on the kernel command + line or with module-parameter loop.max_loop. + + The historic default is 8. If a late 2011 version of losetup(8) + is used, it can be set to 0, since needed loop devices can be + dynamically allocated with the /dev/loop-control interface. + config BLK_DEV_CRYPTOLOOP tristate "Cryptoloop Support" select CRYPTO diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 5c9edf9..3defc52 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1793,21 +1793,6 @@ static int __init loop_init(void) struct loop_device *lo; int err; - /* - * loop module now has a feature to instantiate underlying device - * structure on-demand, provided that there is an access dev node. - * However, this will not work well with user space tool that doesn't - * know about such "feature". In order to not break any existing - * tool, we do the following: - * - * (1) if max_loop is specified, create that many upfront, and this - * also becomes a hard limit. - * (2) if max_loop is not specified, create 8 loop device on module - * load, user can further extend loop device by create dev node - * themselves and have kernel automatically instantiate actual - * device on-demand. - */ - err = misc_register(&loop_misc); if (err < 0) return err; @@ -1833,11 +1818,19 @@ static int __init loop_init(void) if (max_loop > 1UL << (MINORBITS - part_shift)) return -EINVAL; + /* + * If max_loop is specified, create that many devices upfront. + * This also becomes a hard limit. If max_loop is not specified, + * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module + * init time. Loop devices can be requested on-demand with the + * /dev/loop-control interface, or be instantiated by accessing + * a 'dead' device node. + */ if (max_loop) { nr = max_loop; range = max_loop << part_shift; } else { - nr = 8; + nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT; range = 1UL << MINORBITS; } @@ -1847,7 +1840,7 @@ static int __init loop_init(void) blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); - /* pre-create number devices of devices given by config or max_loop */ + /* pre-create number of devices given by config or max_loop */ mutex_lock(&loop_index_mutex); for (i = 0; i < nr; i++) loop_add(&lo, i); -- cgit v1.1 From 05eb0f252b04aa94ace0794f73d56c6a02351d80 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 31 Jul 2011 22:21:35 +0200 Subject: loop: fix deadlock when sysfs and LOOP_CLR_FD race against each other LOOP_CLR_FD takes lo->lo_ctl_mutex and tries to remove the loop sysfs files. Sysfs calls show() and waits for lo->lo_ctl_mutex. LOOP_CLR_FD waits for show() to finish to remove the sysfs file. cat /sys/class/block/loop0/loop/backing_file mutex_lock_nested+0x176/0x350 ? loop_attr_do_show_backing_file+0x2f/0xd0 [loop] ? loop_attr_do_show_backing_file+0x2f/0xd0 [loop] loop_attr_do_show_backing_file+0x2f/0xd0 [loop] dev_attr_show+0x1b/0x60 ? sysfs_read_file+0x86/0x1a0 ? __get_free_pages+0x12/0x50 sysfs_read_file+0xaf/0x1a0 ioctl(LOOP_CLR_FD): wait_for_common+0x12c/0x180 ? try_to_wake_up+0x2a0/0x2a0 wait_for_completion+0x18/0x20 sysfs_deactivate+0x178/0x180 ? sysfs_addrm_finish+0x43/0x70 ? sysfs_addrm_start+0x1d/0x20 sysfs_addrm_finish+0x43/0x70 sysfs_hash_and_remove+0x85/0xa0 sysfs_remove_group+0x59/0x100 loop_clr_fd+0x1dc/0x3f0 [loop] lo_ioctl+0x223/0x7a0 [loop] Instead of taking the lo_ctl_mutex from sysfs code, take the inner lo->lo_lock, to protect the access to the backing_file data. Thanks to Tejun for help debugging and finding a solution. Cc: Milan Broz Cc: Tejun Heo Signed-off-by: Kay Sievers Cc: stable@kernel.org Signed-off-by: Jens Axboe --- drivers/block/loop.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3defc52..4720c7a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -743,10 +743,10 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) ssize_t ret; char *p = NULL; - mutex_lock(&lo->lo_ctl_mutex); + spin_lock_irq(&lo->lo_lock); if (lo->lo_backing_file) p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); - mutex_unlock(&lo->lo_ctl_mutex); + spin_unlock_irq(&lo->lo_lock); if (IS_ERR_OR_NULL(p)) ret = PTR_ERR(p); @@ -1000,7 +1000,9 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) kthread_stop(lo->lo_thread); + spin_lock_irq(&lo->lo_lock); lo->lo_backing_file = NULL; + spin_unlock_irq(&lo->lo_lock); loop_release_xfer(lo); lo->transfer = NULL; -- cgit v1.1 From e5a94f56845bb4b272d82e84b5a1e2080b07ba82 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 1 Aug 2011 10:31:06 +0200 Subject: blk-throttle: correctly determine sync bio read request is always sync. Using rw_is_sync() to determine if a bio is sync. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-throttle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index f6a7941..a19f58c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) { bool rw = bio_data_dir(bio); - bool sync = bio->bi_rw & REQ_SYNC; + bool sync = rw_is_sync(bio->bi_rw); /* Charge the bio to the group */ tg->bytes_disp[rw] += bio->bi_size; @@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) if (tg_no_rule_group(tg, rw)) { blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, - rw, bio->bi_rw & REQ_SYNC); + rw, rw_is_sync(bio->bi_rw)); rcu_read_unlock(); return 0; } -- cgit v1.1 From b03e7495a862b028294f59fc87286d6d78ee7fa1 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Wed, 20 Jul 2011 15:20:54 -0500 Subject: PCI: Set PCI-E Max Payload Size on fabric On a given PCI-E fabric, each device, bridge, and root port can have a different PCI-E maximum payload size. There is a sizable performance boost for having the largest possible maximum payload size on each PCI-E device. However, if improperly configured, fatal bus errors can occur. Thus, it is important to ensure that PCI-E payloads sends by a device are never larger than the MPS setting of all devices on the way to the destination. This can be achieved two ways: - A conservative approach is to use the smallest common denominator of the entire tree below a root complex for every device on that fabric. This means for example that having a 128 bytes MPS USB controller on one leg of a switch will dramatically reduce performances of a video card or 10GE adapter on another leg of that same switch. It also means that any hierarchy supporting hotplug slots (including expresscard or thunderbolt I suppose, dbl check that) will have to be entirely clamped to 128 bytes since we cannot predict what will be plugged into those slots, and we cannot change the MPS on a "live" system. - A more optimal way is possible, if it falls within a couple of constraints: * The top-level host bridge will never generate packets larger than the smallest TLP (or if it can be controlled independently from its MPS at least) * The device will never generate packets larger than MPS (which can be configured via MRRS) * No support of direct PCI-E <-> PCI-E transfers between devices without some additional code to specifically deal with that case Then we can use an approach that basically ignores downstream requests and focuses exclusively on upstream requests. In that case, all we need to care about is that a device MPS is no larger than its parent MPS, which allows us to keep all switches/bridges to the max MPS supported by their parent and eventually the PHB. In this case, your USB controller would no longer "starve" your 10GE Ethernet and your hotplug slots won't affect your global MPS. Additionally, the hotplugged devices themselves can be configured to a larger MPS up to the value configured in the hotplug bridge. To choose between the two available options, two PCI kernel boot args have been added to the PCI calls. "pcie_bus_safe" will provide the former behavior, while "pcie_bus_perf" will perform the latter behavior. By default, the latter behavior is used. NOTE: due to the location of the enablement, each arch will need to add calls to this function. This patch only enables x86. This patch includes a number of changes recommended by Benjamin Herrenschmidt. Tested-by: Jordan_Hargrave@dell.com Signed-off-by: Jon Mason Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 9 +++ drivers/pci/hotplug/pcihp_slot.c | 45 +----------- drivers/pci/pci.c | 67 ++++++++++++++++++ drivers/pci/probe.c | 145 +++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 15 +++- 5 files changed, 236 insertions(+), 45 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index ae3cb23..c953302 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -360,6 +360,15 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) } } + /* After the PCI-E bus has been walked and all devices discovered, + * configure any settings of the fabric that might be necessary. + */ + if (bus) { + struct pci_bus *child; + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child, child->self->pcie_mpss); + } + if (!bus) kfree(sd); diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c index 749fdf0..753b21a 100644 --- a/drivers/pci/hotplug/pcihp_slot.c +++ b/drivers/pci/hotplug/pcihp_slot.c @@ -158,47 +158,6 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) */ } -/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */ -static int pci_set_payload(struct pci_dev *dev) -{ - int pos, ppos; - u16 pctl, psz; - u16 dctl, dsz, dcap, dmax; - struct pci_dev *parent; - - parent = dev->bus->self; - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (!pos) - return 0; - - /* Read Device MaxPayload capability and setting */ - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl); - pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap); - dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; - dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD); - - /* Read Parent MaxPayload setting */ - ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); - if (!ppos) - return 0; - pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); - psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; - - /* If parent payload > device max payload -> error - * If parent payload > device payload -> set speed - * If parent payload <= device payload -> do nothing - */ - if (psz > dmax) - return -1; - else if (psz > dsz) { - dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz); - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, - (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) + - (psz << 5)); - } - return 0; -} - void pci_configure_slot(struct pci_dev *dev) { struct pci_dev *cdev; @@ -210,9 +169,7 @@ void pci_configure_slot(struct pci_dev *dev) (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) return; - ret = pci_set_payload(dev); - if (ret) - dev_warn(&dev->dev, "could not set device max payload\n"); + pcie_bus_configure_settings(dev->bus, dev->bus->self->pcie_mpss); memset(&hpp, 0, sizeof(hpp)); ret = pci_get_hp_params(dev, &hpp); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 08a95b3..466fad6 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -77,6 +77,8 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE; unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; +enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE; + /* * The default CLS is used if arch didn't set CLS explicitly and not * all pci devices agree on the same value. Arch can override either @@ -3223,6 +3225,67 @@ out: EXPORT_SYMBOL(pcie_set_readrq); /** + * pcie_get_mps - get PCI Express maximum payload size + * @dev: PCI device to query + * + * Returns maximum payload size in bytes + * or appropriate error value. + */ +int pcie_get_mps(struct pci_dev *dev) +{ + int ret, cap; + u16 ctl; + + cap = pci_pcie_cap(dev); + if (!cap) + return -EINVAL; + + ret = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); + if (!ret) + ret = 128 << ((ctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5); + + return ret; +} + +/** + * pcie_set_mps - set PCI Express maximum payload size + * @dev: PCI device to query + * @rq: maximum payload size in bytes + * valid values are 128, 256, 512, 1024, 2048, 4096 + * + * If possible sets maximum payload size + */ +int pcie_set_mps(struct pci_dev *dev, int mps) +{ + int cap, err = -EINVAL; + u16 ctl, v; + + if (mps < 128 || mps > 4096 || !is_power_of_2(mps)) + goto out; + + v = ffs(mps) - 8; + if (v > dev->pcie_mpss) + goto out; + v <<= 5; + + cap = pci_pcie_cap(dev); + if (!cap) + goto out; + + err = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); + if (err) + goto out; + + if ((ctl & PCI_EXP_DEVCTL_PAYLOAD) != v) { + ctl &= ~PCI_EXP_DEVCTL_PAYLOAD; + ctl |= v; + err = pci_write_config_word(dev, cap + PCI_EXP_DEVCTL, ctl); + } +out: + return err; +} + +/** * pci_select_bars - Make BAR mask from the type of resource * @dev: the PCI device for which BAR mask is made * @flags: resource type mask to be selected @@ -3505,6 +3568,10 @@ static int __init pci_setup(char *str) pci_hotplug_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "hpmemsize=", 10)) { pci_hotplug_mem_size = memparse(str + 10, &str); + } else if (!strncmp(str, "pcie_bus_safe", 13)) { + pcie_bus_config = PCIE_BUS_SAFE; + } else if (!strncmp(str, "pcie_bus_perf", 13)) { + pcie_bus_config = PCIE_BUS_PERFORMANCE; } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 795c902..5becf7c 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -856,6 +856,8 @@ void set_pcie_port_type(struct pci_dev *pdev) pdev->pcie_cap = pos; pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; + pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, ®16); + pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD; } void set_pcie_hotplug_bridge(struct pci_dev *pdev) @@ -1326,6 +1328,149 @@ int pci_scan_slot(struct pci_bus *bus, int devfn) return nr; } +static int pcie_find_smpss(struct pci_dev *dev, void *data) +{ + u8 *smpss = data; + + if (!pci_is_pcie(dev)) + return 0; + + /* For PCIE hotplug enabled slots not connected directly to a + * PCI-E root port, there can be problems when hotplugging + * devices. This is due to the possibility of hotplugging a + * device into the fabric with a smaller MPS that the devices + * currently running have configured. Modifying the MPS on the + * running devices could cause a fatal bus error due to an + * incoming frame being larger than the newly configured MPS. + * To work around this, the MPS for the entire fabric must be + * set to the minimum size. Any devices hotplugged into this + * fabric will have the minimum MPS set. If the PCI hotplug + * slot is directly connected to the root port and there are not + * other devices on the fabric (which seems to be the most + * common case), then this is not an issue and MPS discovery + * will occur as normal. + */ + if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) || + dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT)) + *smpss = 0; + + if (*smpss > dev->pcie_mpss) + *smpss = dev->pcie_mpss; + + return 0; +} + +static void pcie_write_mps(struct pci_dev *dev, int mps) +{ + int rc, dev_mpss; + + dev_mpss = 128 << dev->pcie_mpss; + + if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { + if (dev->bus->self) { + dev_dbg(&dev->bus->dev, "Bus MPSS %d\n", + 128 << dev->bus->self->pcie_mpss); + + /* For "MPS Force Max", the assumption is made that + * downstream communication will never be larger than + * the MRRS. So, the MPS only needs to be configured + * for the upstream communication. This being the case, + * walk from the top down and set the MPS of the child + * to that of the parent bus. + */ + mps = 128 << dev->bus->self->pcie_mpss; + if (mps > dev_mpss) + dev_warn(&dev->dev, "MPS configured higher than" + " maximum supported by the device. If" + " a bus issue occurs, try running with" + " pci=pcie_bus_safe.\n"); + } + + dev->pcie_mpss = ffs(mps) - 8; + } + + rc = pcie_set_mps(dev, mps); + if (rc) + dev_err(&dev->dev, "Failed attempting to set the MPS\n"); +} + +static void pcie_write_mrrs(struct pci_dev *dev, int mps) +{ + int rc, mrrs; + + if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { + int dev_mpss = 128 << dev->pcie_mpss; + + /* For Max performance, the MRRS must be set to the largest + * supported value. However, it cannot be configured larger + * than the MPS the device or the bus can support. This assumes + * that the largest MRRS available on the device cannot be + * smaller than the device MPSS. + */ + mrrs = mps < dev_mpss ? mps : dev_mpss; + } else + /* In the "safe" case, configure the MRRS for fairness on the + * bus by making all devices have the same size + */ + mrrs = mps; + + + /* MRRS is a R/W register. Invalid values can be written, but a + * subsiquent read will verify if the value is acceptable or not. + * If the MRRS value provided is not acceptable (e.g., too large), + * shrink the value until it is acceptable to the HW. + */ + while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) { + rc = pcie_set_readrq(dev, mrrs); + if (rc) + dev_err(&dev->dev, "Failed attempting to set the MRRS\n"); + + mrrs /= 2; + } +} + +static int pcie_bus_configure_set(struct pci_dev *dev, void *data) +{ + int mps = 128 << *(u8 *)data; + + if (!pci_is_pcie(dev)) + return 0; + + dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", + pcie_get_mps(dev), 128<pcie_mpss, pcie_get_readrq(dev)); + + pcie_write_mps(dev, mps); + pcie_write_mrrs(dev, mps); + + dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", + pcie_get_mps(dev), 128<pcie_mpss, pcie_get_readrq(dev)); + + return 0; +} + +/* pcie_bus_configure_mps requires that pci_walk_bus work in a top-down, + * parents then children fashion. If this changes, then this code will not + * work as designed. + */ +void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss) +{ + u8 smpss = mpss; + + if (!bus->self) + return; + + if (!pci_is_pcie(bus->self)) + return; + + if (pcie_bus_config == PCIE_BUS_SAFE) { + pcie_find_smpss(bus->self, &smpss); + pci_walk_bus(bus, pcie_find_smpss, &smpss); + } + + pcie_bus_configure_set(bus->self, &smpss); + pci_walk_bus(bus, pcie_bus_configure_set, &smpss); +} + unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) { unsigned int devfn, pass, max = bus->secondary; diff --git a/include/linux/pci.h b/include/linux/pci.h index f27893b..1ff9bba 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -251,7 +251,8 @@ struct pci_dev { u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ u8 pcie_cap; /* PCI-E capability offset */ - u8 pcie_type; /* PCI-E device/port type */ + u8 pcie_type:4; /* PCI-E device/port type */ + u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */ u8 rom_base_reg; /* which config register controls the ROM */ u8 pin; /* which interrupt pin this device uses */ @@ -617,6 +618,16 @@ struct pci_driver { /* these external functions are only available when PCI support is enabled */ #ifdef CONFIG_PCI +extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss); + +enum pcie_bus_config_types { + PCIE_BUS_PERFORMANCE, + PCIE_BUS_SAFE, + PCIE_BUS_PEER2PEER, +}; + +extern enum pcie_bus_config_types pcie_bus_config; + extern struct bus_type pci_bus_type; /* Do NOT directly access these two variables, unless you are arch specific pci @@ -796,6 +807,8 @@ int pcix_get_mmrbc(struct pci_dev *dev); int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); int pcie_get_readrq(struct pci_dev *dev); int pcie_set_readrq(struct pci_dev *dev, int rq); +int pcie_get_mps(struct pci_dev *dev); +int pcie_set_mps(struct pci_dev *dev, int mps); int __pci_reset_function(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); -- cgit v1.1 From be768912a49b10b68e96fbd8fa3cab0adfbd3091 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 25 Jul 2011 13:08:38 -0700 Subject: PCI: honor child buses add_size in hot plug configuration git commit c8adf9a3e873eddaaec11ac410a99ef6b9656938 "PCI: pre-allocate additional resources to devices only after successful allocation of essential resources." fails to take into consideration the optional-resources needed by children devices while calculating the optional-resource needed by the bridge. This can be a problem on some setup. For example, if a hotplug bridge has 8 children hotplug bridges, the bridge should have enough resources to accomodate the hotplug requirements for each of its children hotplug bridges. Currently this is not the case. This patch fixes the problem. Signed-off-by: Yinghai Lu Reviewed-by: Ram Pai Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 8a1d3c7..4409cd0 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -540,6 +540,20 @@ static resource_size_t calculate_memsize(resource_size_t size, return size; } +static resource_size_t get_res_add_size(struct resource_list_x *add_head, + struct resource *res) +{ + struct resource_list_x *list; + + /* check if it is in add_head list */ + for (list = add_head->next; list && list->res != res; + list = list->next); + if (list) + return list->add_size; + + return 0; +} + /** * pbus_size_io() - size the io window of a given bus * @@ -559,6 +573,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, struct pci_dev *dev; struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO); unsigned long size = 0, size0 = 0, size1 = 0; + resource_size_t children_add_size = 0; if (!b_res) return; @@ -579,10 +594,15 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, size += r_size; else size1 += r_size; + + if (add_head) + children_add_size += get_res_add_size(add_head, r); } } size0 = calculate_iosize(size, min_size, size1, resource_size(b_res), 4096); + if (children_add_size > add_size) + add_size = children_add_size; size1 = (!add_head || (add_head && !add_size)) ? size0 : calculate_iosize(size, min_size+add_size, size1, resource_size(b_res), 4096); @@ -624,6 +644,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, int order, max_order; struct resource *b_res = find_free_bus_resource(bus, type); unsigned int mem64_mask = 0; + resource_size_t children_add_size = 0; if (!b_res) return 0; @@ -665,6 +686,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, if (order > max_order) max_order = order; mem64_mask &= r->flags & IORESOURCE_MEM_64; + + if (add_head) + children_add_size += get_res_add_size(add_head, r); } } align = 0; @@ -681,6 +705,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, align += aligns[order]; } size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align); + if (children_add_size > add_size) + add_size = children_add_size; size1 = (!add_head || (add_head && !add_size)) ? size0 : calculate_memsize(size, min_size+add_size, 0, resource_size(b_res), min_align); -- cgit v1.1 From 2bbc6942273b5b3097bd265d82227bdd84b351b2 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 25 Jul 2011 13:08:39 -0700 Subject: PCI : ability to relocate assigned pci-resources Currently pci-bridges are allocated enough resources to satisfy their immediate requirements. Any additional resource-requests fail if additional free space, contiguous to the one already allocated, is not available. This behavior is not reasonable since sufficient contiguous resources, that can satisfy the request, are available at a different location. This patch provides the ability to expand and relocate a allocated resource. v2: Changelog: Fixed size calculation in pci_reassign_resource() v3: Changelog : Split this patch. The resource.c changes are already upstream. All the pci driver changes are in here. Signed-off-by: Ram Pai Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 27 +++++---- drivers/pci/setup-res.c | 152 ++++++++++++++++++++++++++++++------------------ include/linux/pci.h | 1 + 3 files changed, 113 insertions(+), 67 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 4409cd0..1796c6f 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -34,6 +34,7 @@ struct resource_list_x { resource_size_t start; resource_size_t end; resource_size_t add_size; + resource_size_t min_align; unsigned long flags; }; @@ -65,7 +66,7 @@ void pci_realloc(void) */ static void add_to_list(struct resource_list_x *head, struct pci_dev *dev, struct resource *res, - resource_size_t add_size) + resource_size_t add_size, resource_size_t min_align) { struct resource_list_x *list = head; struct resource_list_x *ln = list->next; @@ -84,13 +85,16 @@ static void add_to_list(struct resource_list_x *head, tmp->end = res->end; tmp->flags = res->flags; tmp->add_size = add_size; + tmp->min_align = min_align; list->next = tmp; } static void add_to_failed_list(struct resource_list_x *head, struct pci_dev *dev, struct resource *res) { - add_to_list(head, dev, res, 0); + add_to_list(head, dev, res, + 0 /* dont care */, + 0 /* dont care */); } static void __dev_sort_resources(struct pci_dev *dev, @@ -159,13 +163,16 @@ static void adjust_resources_sorted(struct resource_list_x *add_head, idx = res - &list->dev->resource[0]; add_size=list->add_size; - if (!resource_size(res) && add_size) { - res->end = res->start + add_size - 1; - if(pci_assign_resource(list->dev, idx)) + if (!resource_size(res)) { + res->end = res->start + add_size - 1; + if(pci_assign_resource(list->dev, idx)) reset_resource(res); - } else if (add_size) { - adjust_resource(res, res->start, - resource_size(res) + add_size); + } else { + resource_size_t align = list->min_align; + res->flags |= list->flags & (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN); + if (pci_reassign_resource(list->dev, idx, add_size, align)) + dev_printk(KERN_DEBUG, &list->dev->dev, "failed to add optional resources res=%pR\n", + res); } out: tmp = list; @@ -619,7 +626,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, b_res->end = b_res->start + size0 - 1; b_res->flags |= IORESOURCE_STARTALIGN; if (size1 > size0 && add_head) - add_to_list(add_head, bus->self, b_res, size1-size0); + add_to_list(add_head, bus->self, b_res, size1-size0, 4096); } /** @@ -722,7 +729,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, b_res->end = size0 + min_align - 1; b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask; if (size1 > size0 && add_head) - add_to_list(add_head, bus->self, b_res, size1-size0); + add_to_list(add_head, bus->self, b_res, size1-size0, min_align); return 1; } diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 319f359..51a9095 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -128,16 +128,16 @@ void pci_disable_bridge_window(struct pci_dev *dev) } #endif /* CONFIG_PCI_QUIRKS */ + + static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, - int resno) + int resno, resource_size_t size, resource_size_t align) { struct resource *res = dev->resource + resno; - resource_size_t size, min, align; + resource_size_t min; int ret; - size = resource_size(res); min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; - align = pci_resource_alignment(dev, res); /* First, try exact prefetching match.. */ ret = pci_bus_alloc_resource(bus, res, size, align, min, @@ -154,56 +154,101 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, ret = pci_bus_alloc_resource(bus, res, size, align, min, 0, pcibios_align_resource, dev); } + return ret; +} - if (ret < 0 && dev->fw_addr[resno]) { - struct resource *root, *conflict; - resource_size_t start, end; +static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, + int resno, resource_size_t size) +{ + struct resource *root, *conflict; + resource_size_t start, end; + int ret = 0; - /* - * If we failed to assign anything, let's try the address - * where firmware left it. That at least has a chance of - * working, which is better than just leaving it disabled. - */ + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + + start = res->start; + end = res->end; + res->start = dev->fw_addr[resno]; + res->end = res->start + size - 1; + dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n", + resno, res); + conflict = request_resource_conflict(root, res); + if (conflict) { + dev_info(&dev->dev, + "BAR %d: %pR conflicts with %s %pR\n", resno, + res, conflict->name, conflict); + res->start = start; + res->end = end; + ret = 1; + } + return ret; +} + +static int _pci_assign_resource(struct pci_dev *dev, int resno, int size, resource_size_t min_align) +{ + struct resource *res = dev->resource + resno; + struct pci_bus *bus; + int ret; + char *type; - if (res->flags & IORESOURCE_IO) - root = &ioport_resource; + bus = dev->bus; + while ((ret = __pci_assign_resource(bus, dev, resno, size, min_align))) { + if (!bus->parent || !bus->self->transparent) + break; + bus = bus->parent; + } + + if (ret) { + if (res->flags & IORESOURCE_MEM) + if (res->flags & IORESOURCE_PREFETCH) + type = "mem pref"; + else + type = "mem"; + else if (res->flags & IORESOURCE_IO) + type = "io"; else - root = &iomem_resource; - - start = res->start; - end = res->end; - res->start = dev->fw_addr[resno]; - res->end = res->start + size - 1; - dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n", - resno, res); - conflict = request_resource_conflict(root, res); - if (conflict) { - dev_info(&dev->dev, - "BAR %d: %pR conflicts with %s %pR\n", resno, - res, conflict->name, conflict); - res->start = start; - res->end = end; - } else - ret = 0; + type = "unknown"; + dev_info(&dev->dev, + "BAR %d: can't assign %s (size %#llx)\n", + resno, type, (unsigned long long) resource_size(res)); } + return ret; +} + +int pci_reassign_resource(struct pci_dev *dev, int resno, resource_size_t addsize, + resource_size_t min_align) +{ + struct resource *res = dev->resource + resno; + resource_size_t new_size; + int ret; + + if (!res->parent) { + dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resouce %pR " + "\n", resno, res); + return -EINVAL; + } + + new_size = resource_size(res) + addsize + min_align; + ret = _pci_assign_resource(dev, resno, new_size, min_align); if (!ret) { res->flags &= ~IORESOURCE_STARTALIGN; dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); if (resno < PCI_BRIDGE_RESOURCES) pci_update_resource(dev, resno); } - return ret; } int pci_assign_resource(struct pci_dev *dev, int resno) { struct resource *res = dev->resource + resno; - resource_size_t align; + resource_size_t align, size; struct pci_bus *bus; int ret; - char *type; align = pci_resource_alignment(dev, res); if (!align) { @@ -213,34 +258,27 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } bus = dev->bus; - while ((ret = __pci_assign_resource(bus, dev, resno))) { - if (bus->parent && bus->self->transparent) - bus = bus->parent; - else - bus = NULL; - if (bus) - continue; - break; - } + size = resource_size(res); + ret = _pci_assign_resource(dev, resno, size, align); - if (ret) { - if (res->flags & IORESOURCE_MEM) - if (res->flags & IORESOURCE_PREFETCH) - type = "mem pref"; - else - type = "mem"; - else if (res->flags & IORESOURCE_IO) - type = "io"; - else - type = "unknown"; - dev_info(&dev->dev, - "BAR %d: can't assign %s (size %#llx)\n", - resno, type, (unsigned long long) resource_size(res)); - } + /* + * If we failed to assign anything, let's try the address + * where firmware left it. That at least has a chance of + * working, which is better than just leaving it disabled. + */ + if (ret < 0 && dev->fw_addr[resno]) + ret = pci_revert_fw_address(res, dev, resno, size); + if (!ret) { + res->flags &= ~IORESOURCE_STARTALIGN; + dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); + if (resno < PCI_BRIDGE_RESOURCES) + pci_update_resource(dev, resno); + } return ret; } + /* Sort resources by alignment */ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 1ff9bba..8c230cb 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -813,6 +813,7 @@ int __pci_reset_function(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); +int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); int pci_select_bars(struct pci_dev *dev, unsigned long flags); /* ROM control related routines */ -- cgit v1.1 From 2aceefcbd5a73059e5f52831817ec277e987440d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 25 Jul 2011 13:08:40 -0700 Subject: PCI: make SRIOV resources optional From: Yinghai Lu Allocate resources to SRIOV BARs only after all other required resource-requests are satisfied. Dont retry if resource allocation for SRIOV BARs fail. Signed-off-by: Ram Pai Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 1796c6f..1c19b9f 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -673,6 +673,16 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, if (r->parent || (r->flags & mask) != type) continue; r_size = resource_size(r); +#ifdef CONFIG_PCI_IOV + /* put SRIOV requested res to the optional list */ + if (add_head && i >= PCI_IOV_RESOURCES && + i <= PCI_IOV_RESOURCE_END) { + r->end = r->start - 1; + add_to_list(add_head, dev, r, r_size, 1); + children_add_size += r_size; + continue; + } +#endif /* For bridges size != alignment */ align = pci_resource_alignment(dev, r); order = __ffs(align) - 20; -- cgit v1.1 From 0a2daa1cf35004f5adbf4138555cc5669abf3a3e Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 25 Jul 2011 13:08:41 -0700 Subject: PCI: make cardbus-bridge resources optional Allocate resources to cardbus bridge only after all other genuine resources requests are satisfied. Dont retry if resource allocation for cardbus-bridges fail. Signed-off-by: Ram Pai Signed-off-by: Jesse Barnes --- drivers/pci/pci.h | 4 ++++ drivers/pci/setup-bus.c | 41 ++++++++++++++++++++++++++++++++--------- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index c8cee76..b74084e 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -283,6 +283,8 @@ static inline int pci_iov_bus_range(struct pci_bus *bus) #endif /* CONFIG_PCI_IOV */ +extern unsigned long pci_cardbus_resource_alignment(struct resource *); + static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, struct resource *res) { @@ -292,6 +294,8 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END) return pci_sriov_resource_alignment(dev, resno); #endif + if (dev->class >> 8 == PCI_CLASS_BRIDGE_CARDBUS) + return pci_cardbus_resource_alignment(res); return resource_alignment(res); } diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 1c19b9f..29e7cc7 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -164,6 +164,7 @@ static void adjust_resources_sorted(struct resource_list_x *add_head, idx = res - &list->dev->resource[0]; add_size=list->add_size; if (!resource_size(res)) { + res->start = list->start; res->end = res->start + add_size - 1; if(pci_assign_resource(list->dev, idx)) reset_resource(res); @@ -223,7 +224,7 @@ static void __assign_resources_sorted(struct resource_list *head, /* Satisfy the must-have resource requests */ assign_requested_resources_sorted(head, fail_head); - /* Try to satisfy any additional nice-to-have resource + /* Try to satisfy any additional optional resource requests */ if (add_head) adjust_resources_sorted(add_head, head); @@ -678,7 +679,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, if (add_head && i >= PCI_IOV_RESOURCES && i <= PCI_IOV_RESOURCE_END) { r->end = r->start - 1; - add_to_list(add_head, dev, r, r_size, 1); + add_to_list(add_head, dev, r, r_size, 0/* dont' care */); children_add_size += r_size; continue; } @@ -743,7 +744,17 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, return 1; } -static void pci_bus_size_cardbus(struct pci_bus *bus) +unsigned long pci_cardbus_resource_alignment(struct resource *res) +{ + if (res->flags & IORESOURCE_IO) + return pci_cardbus_io_size; + if (res->flags & IORESOURCE_MEM) + return pci_cardbus_mem_size; + return 0; +} + +static void pci_bus_size_cardbus(struct pci_bus *bus, + struct resource_list_x *add_head) { struct pci_dev *bridge = bus->self; struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; @@ -754,12 +765,14 @@ static void pci_bus_size_cardbus(struct pci_bus *bus) * a fixed amount of bus space for CardBus bridges. */ b_res[0].start = 0; - b_res[0].end = pci_cardbus_io_size - 1; b_res[0].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; + if (add_head) + add_to_list(add_head, bridge, b_res, pci_cardbus_io_size, 0 /* dont care */); b_res[1].start = 0; - b_res[1].end = pci_cardbus_io_size - 1; b_res[1].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; + if (add_head) + add_to_list(add_head, bridge, b_res+1, pci_cardbus_io_size, 0 /* dont care */); /* * Check whether prefetchable memory is supported @@ -779,17 +792,27 @@ static void pci_bus_size_cardbus(struct pci_bus *bus) */ if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) { b_res[2].start = 0; - b_res[2].end = pci_cardbus_mem_size - 1; b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_SIZEALIGN; + if (add_head) + add_to_list(add_head, bridge, b_res+2, pci_cardbus_mem_size, 0 /* dont care */); b_res[3].start = 0; - b_res[3].end = pci_cardbus_mem_size - 1; b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; + if (add_head) + add_to_list(add_head, bridge, b_res+3, pci_cardbus_mem_size, 0 /* dont care */); } else { b_res[3].start = 0; - b_res[3].end = pci_cardbus_mem_size * 2 - 1; b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; + if (add_head) + add_to_list(add_head, bridge, b_res+3, pci_cardbus_mem_size * 2, 0 /* dont care */); } + + /* set the size of the resource to zero, so that the resource does not + * get assigned during required-resource allocation cycle but gets assigned + * during the optional-resource allocation cycle. + */ + b_res[0].start = b_res[1].start = b_res[2].start = b_res[3].start = 1; + b_res[0].end = b_res[1].end = b_res[2].end = b_res[3].end = 0; } void __ref __pci_bus_size_bridges(struct pci_bus *bus, @@ -806,7 +829,7 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, switch (dev->class >> 8) { case PCI_CLASS_BRIDGE_CARDBUS: - pci_bus_size_cardbus(b); + pci_bus_size_cardbus(b, add_head); break; case PCI_CLASS_BRIDGE_PCI: -- cgit v1.1 From 9e8bf93a7f416a3fa8fb6d76177d90e67bd45496 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 25 Jul 2011 13:08:42 -0700 Subject: PCI: code and comments cleanup a) adjust_resource_sorted() is now called reassign_resource_sorted() b) nice-to-have is now called optional c) add_list is now called realloc_list. Signed-off-by: Ram Pai Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 110 ++++++++++++++++++++++++------------------------ 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 29e7cc7..784da9d 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -125,18 +125,18 @@ static inline void reset_resource(struct resource *res) } /** - * adjust_resources_sorted() - satisfy any additional resource requests + * reassign_resources_sorted() - satisfy any additional resource requests * - * @add_head : head of the list tracking requests requiring additional + * @realloc_head : head of the list tracking requests requiring additional * resources * @head : head of the list tracking requests with allocated * resources * - * Walk through each element of the add_head and try to procure + * Walk through each element of the realloc_head and try to procure * additional resources for the element, provided the element * is in the head list. */ -static void adjust_resources_sorted(struct resource_list_x *add_head, +static void reassign_resources_sorted(struct resource_list_x *realloc_head, struct resource_list *head) { struct resource *res; @@ -145,8 +145,8 @@ static void adjust_resources_sorted(struct resource_list_x *add_head, resource_size_t add_size; int idx; - prev = add_head; - for (list = add_head->next; list;) { + prev = realloc_head; + for (list = realloc_head->next; list;) { res = list->res; /* skip resource that has been reset */ if (!res->flags) @@ -218,7 +218,7 @@ static void assign_requested_resources_sorted(struct resource_list *head, } static void __assign_resources_sorted(struct resource_list *head, - struct resource_list_x *add_head, + struct resource_list_x *realloc_head, struct resource_list_x *fail_head) { /* Satisfy the must-have resource requests */ @@ -226,8 +226,8 @@ static void __assign_resources_sorted(struct resource_list *head, /* Try to satisfy any additional optional resource requests */ - if (add_head) - adjust_resources_sorted(add_head, head); + if (realloc_head) + reassign_resources_sorted(realloc_head, head); free_list(resource_list, head); } @@ -243,7 +243,7 @@ static void pdev_assign_resources_sorted(struct pci_dev *dev, } static void pbus_assign_resources_sorted(const struct pci_bus *bus, - struct resource_list_x *add_head, + struct resource_list_x *realloc_head, struct resource_list_x *fail_head) { struct pci_dev *dev; @@ -253,7 +253,7 @@ static void pbus_assign_resources_sorted(const struct pci_bus *bus, list_for_each_entry(dev, &bus->devices, bus_list) __dev_sort_resources(dev, &head); - __assign_resources_sorted(&head, add_head, fail_head); + __assign_resources_sorted(&head, realloc_head, fail_head); } void pci_setup_cardbus(struct pci_bus *bus) @@ -548,13 +548,13 @@ static resource_size_t calculate_memsize(resource_size_t size, return size; } -static resource_size_t get_res_add_size(struct resource_list_x *add_head, +static resource_size_t get_res_add_size(struct resource_list_x *realloc_head, struct resource *res) { struct resource_list_x *list; - /* check if it is in add_head list */ - for (list = add_head->next; list && list->res != res; + /* check if it is in realloc_head list */ + for (list = realloc_head->next; list && list->res != res; list = list->next); if (list) return list->add_size; @@ -568,7 +568,7 @@ static resource_size_t get_res_add_size(struct resource_list_x *add_head, * @bus : the bus * @min_size : the minimum io window that must to be allocated * @add_size : additional optional io window - * @add_head : track the additional io window on this list + * @realloc_head : track the additional io window on this list * * Sizing the IO windows of the PCI-PCI bridge is trivial, * since these windows have 4K granularity and the IO ranges @@ -576,7 +576,7 @@ static resource_size_t get_res_add_size(struct resource_list_x *add_head, * We must be careful with the ISA aliasing though. */ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, - resource_size_t add_size, struct resource_list_x *add_head) + resource_size_t add_size, struct resource_list_x *realloc_head) { struct pci_dev *dev; struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO); @@ -603,15 +603,15 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, else size1 += r_size; - if (add_head) - children_add_size += get_res_add_size(add_head, r); + if (realloc_head) + children_add_size += get_res_add_size(realloc_head, r); } } size0 = calculate_iosize(size, min_size, size1, resource_size(b_res), 4096); if (children_add_size > add_size) add_size = children_add_size; - size1 = (!add_head || (add_head && !add_size)) ? size0 : + size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 : calculate_iosize(size, min_size+add_size, size1, resource_size(b_res), 4096); if (!size0 && !size1) { @@ -626,8 +626,8 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, b_res->start = 4096; b_res->end = b_res->start + size0 - 1; b_res->flags |= IORESOURCE_STARTALIGN; - if (size1 > size0 && add_head) - add_to_list(add_head, bus->self, b_res, size1-size0, 4096); + if (size1 > size0 && realloc_head) + add_to_list(realloc_head, bus->self, b_res, size1-size0, 4096); } /** @@ -636,7 +636,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, * @bus : the bus * @min_size : the minimum memory window that must to be allocated * @add_size : additional optional memory window - * @add_head : track the additional memory window on this list + * @realloc_head : track the additional memory window on this list * * Calculate the size of the bus and minimal alignment which * guarantees that all child resources fit in this size. @@ -644,7 +644,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long type, resource_size_t min_size, resource_size_t add_size, - struct resource_list_x *add_head) + struct resource_list_x *realloc_head) { struct pci_dev *dev; resource_size_t min_align, align, size, size0, size1; @@ -676,10 +676,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, r_size = resource_size(r); #ifdef CONFIG_PCI_IOV /* put SRIOV requested res to the optional list */ - if (add_head && i >= PCI_IOV_RESOURCES && + if (realloc_head && i >= PCI_IOV_RESOURCES && i <= PCI_IOV_RESOURCE_END) { r->end = r->start - 1; - add_to_list(add_head, dev, r, r_size, 0/* dont' care */); + add_to_list(realloc_head, dev, r, r_size, 0/* dont' care */); children_add_size += r_size; continue; } @@ -705,8 +705,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, max_order = order; mem64_mask &= r->flags & IORESOURCE_MEM_64; - if (add_head) - children_add_size += get_res_add_size(add_head, r); + if (realloc_head) + children_add_size += get_res_add_size(realloc_head, r); } } align = 0; @@ -725,7 +725,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align); if (children_add_size > add_size) add_size = children_add_size; - size1 = (!add_head || (add_head && !add_size)) ? size0 : + size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 : calculate_memsize(size, min_size+add_size, 0, resource_size(b_res), min_align); if (!size0 && !size1) { @@ -739,8 +739,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, b_res->start = min_align; b_res->end = size0 + min_align - 1; b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask; - if (size1 > size0 && add_head) - add_to_list(add_head, bus->self, b_res, size1-size0, min_align); + if (size1 > size0 && realloc_head) + add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align); return 1; } @@ -754,7 +754,7 @@ unsigned long pci_cardbus_resource_alignment(struct resource *res) } static void pci_bus_size_cardbus(struct pci_bus *bus, - struct resource_list_x *add_head) + struct resource_list_x *realloc_head) { struct pci_dev *bridge = bus->self; struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; @@ -766,13 +766,13 @@ static void pci_bus_size_cardbus(struct pci_bus *bus, */ b_res[0].start = 0; b_res[0].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; - if (add_head) - add_to_list(add_head, bridge, b_res, pci_cardbus_io_size, 0 /* dont care */); + if (realloc_head) + add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size, 0 /* dont care */); b_res[1].start = 0; b_res[1].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; - if (add_head) - add_to_list(add_head, bridge, b_res+1, pci_cardbus_io_size, 0 /* dont care */); + if (realloc_head) + add_to_list(realloc_head, bridge, b_res+1, pci_cardbus_io_size, 0 /* dont care */); /* * Check whether prefetchable memory is supported @@ -793,18 +793,18 @@ static void pci_bus_size_cardbus(struct pci_bus *bus, if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) { b_res[2].start = 0; b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_SIZEALIGN; - if (add_head) - add_to_list(add_head, bridge, b_res+2, pci_cardbus_mem_size, 0 /* dont care */); + if (realloc_head) + add_to_list(realloc_head, bridge, b_res+2, pci_cardbus_mem_size, 0 /* dont care */); b_res[3].start = 0; b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; - if (add_head) - add_to_list(add_head, bridge, b_res+3, pci_cardbus_mem_size, 0 /* dont care */); + if (realloc_head) + add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size, 0 /* dont care */); } else { b_res[3].start = 0; b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; - if (add_head) - add_to_list(add_head, bridge, b_res+3, pci_cardbus_mem_size * 2, 0 /* dont care */); + if (realloc_head) + add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size * 2, 0 /* dont care */); } /* set the size of the resource to zero, so that the resource does not @@ -816,7 +816,7 @@ static void pci_bus_size_cardbus(struct pci_bus *bus, } void __ref __pci_bus_size_bridges(struct pci_bus *bus, - struct resource_list_x *add_head) + struct resource_list_x *realloc_head) { struct pci_dev *dev; unsigned long mask, prefmask; @@ -829,12 +829,12 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, switch (dev->class >> 8) { case PCI_CLASS_BRIDGE_CARDBUS: - pci_bus_size_cardbus(b, add_head); + pci_bus_size_cardbus(b, realloc_head); break; case PCI_CLASS_BRIDGE_PCI: default: - __pci_bus_size_bridges(b, add_head); + __pci_bus_size_bridges(b, realloc_head); break; } } @@ -858,7 +858,7 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, * Follow thru */ default: - pbus_size_io(bus, 0, additional_io_size, add_head); + pbus_size_io(bus, 0, additional_io_size, realloc_head); /* If the bridge supports prefetchable range, size it separately. If it doesn't, or its prefetchable window has already been allocated by arch code, try @@ -866,11 +866,11 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, resources. */ mask = IORESOURCE_MEM; prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH; - if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, add_head)) + if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, realloc_head)) mask = prefmask; /* Success, size non-prefetch only. */ else additional_mem_size += additional_mem_size; - pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, add_head); + pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, realloc_head); break; } } @@ -882,20 +882,20 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus) EXPORT_SYMBOL(pci_bus_size_bridges); static void __ref __pci_bus_assign_resources(const struct pci_bus *bus, - struct resource_list_x *add_head, + struct resource_list_x *realloc_head, struct resource_list_x *fail_head) { struct pci_bus *b; struct pci_dev *dev; - pbus_assign_resources_sorted(bus, add_head, fail_head); + pbus_assign_resources_sorted(bus, realloc_head, fail_head); list_for_each_entry(dev, &bus->devices, bus_list) { b = dev->subordinate; if (!b) continue; - __pci_bus_assign_resources(b, add_head, fail_head); + __pci_bus_assign_resources(b, realloc_head, fail_head); switch (dev->class >> 8) { case PCI_CLASS_BRIDGE_PCI: @@ -1105,7 +1105,7 @@ void __init pci_assign_unassigned_resources(void) { struct pci_bus *bus; - struct resource_list_x add_list; /* list of resources that + struct resource_list_x realloc_list; /* list of resources that want additional resources */ int tried_times = 0; enum release_type rel_type = leaf_only; @@ -1118,7 +1118,7 @@ pci_assign_unassigned_resources(void) head.next = NULL; - add_list.next = NULL; + realloc_list.next = NULL; pci_try_num = max_depth + 1; printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n", @@ -1128,12 +1128,12 @@ again: /* Depth first, calculate sizes and alignments of all subordinate buses. */ list_for_each_entry(bus, &pci_root_buses, node) - __pci_bus_size_bridges(bus, &add_list); + __pci_bus_size_bridges(bus, &realloc_list); /* Depth last, allocate resources and update the hardware. */ list_for_each_entry(bus, &pci_root_buses, node) - __pci_bus_assign_resources(bus, &add_list, &head); - BUG_ON(add_list.next); + __pci_bus_assign_resources(bus, &realloc_list, &head); + BUG_ON(realloc_list.next); tried_times++; /* any device complain? */ -- cgit v1.1 From 4a4c879904aa0cc64629e14a49b64fb3d149bf1a Mon Sep 17 00:00:00 2001 From: Dan Bastone Date: Sun, 31 Jul 2011 07:40:49 -0400 Subject: HID: add support for new revision of Apple aluminum keyboard Add USB device ids for the new revision (MB110LL/B) of Apple's wired aluminum keyboard. I have only confirmed that the ANSI version is correct - it is assumed that the ISO and JIS versions follow the standard numbering convention. Signed-off-by: Dan Bastone Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 6 ++++++ drivers/hid/hid-core.c | 3 +++ drivers/hid/hid-ids.h | 3 +++ 3 files changed, 12 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index b85744f..18b3bc6 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -444,6 +444,12 @@ static const struct hid_device_id apple_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS), .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN | APPLE_RDESC_JIS }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), + .driver_data = APPLE_HAS_FN }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO), + .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS), + .driver_data = APPLE_HAS_FN }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI), .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO), diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 1a5cf0c..242353d 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1340,6 +1340,9 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index db63ccf..61c8809 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -109,6 +109,9 @@ #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI 0x0245 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO 0x0246 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS 0x0247 +#define USB_DEVICE_ID_APPLE_ALU_REVB_ANSI 0x024f +#define USB_DEVICE_ID_APPLE_ALU_REVB_ISO 0x0250 +#define USB_DEVICE_ID_APPLE_ALU_REVB_JIS 0x0251 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI 0x0239 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO 0x023a #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS 0x023b -- cgit v1.1 From a5395b83b78f62ccf5e3af854aacd025c2a6e7b5 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 2 Aug 2011 09:24:09 +0200 Subject: cfq-iosched: Reduce linked group count upon group destruction FQ keeps track of number of groups which are linked on blkcg->blkg_list. This is useful to avoid races between queue exit and cgroup exit code paths. So if at the request queue exit time linked group count is not zero, that means there are some group out there which is yet to be deleted under rcu read period and queue exit code should wait for on rcu period. In my previous patch I forgot to decrease the number of group count. So in current form, we nr_blkcg_linked_grps is always non-zero and we will always wait one rcu period (if BLK_CGROUP=y). The side effect of this is that it can increase boot time. I am surprised, nobody complained so far. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 1f96ad6..6508345 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1209,6 +1209,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) hlist_del_init(&cfqg->cfqd_node); + BUG_ON(cfqd->nr_blkcg_linked_grps <= 0); + cfqd->nr_blkcg_linked_grps--; + /* * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. -- cgit v1.1 From e2a5429ff7947ad251310376384f449297b7492a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 2 Aug 2011 10:43:35 +0200 Subject: bsg-lib: add module.h include Due to conflicts with the moduleh tree in linux-next, we run into an include file mess. We really need export.h in that tree, but if we add module.h locally then the issue is easier to resolve. Reported-by: Stephen Rothwell Signed-off-by: Jens Axboe --- block/bsg-lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/bsg-lib.c b/block/bsg-lib.c index f8c0a61..6690e6e 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -25,6 +25,7 @@ #include #include #include +#include #include /** -- cgit v1.1 From ddad9ef5826efdfbbdb67b13b46f30e43e46ec3e Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Tue, 2 Aug 2011 12:43:49 +0200 Subject: drivers/block/drbd/drbd_nl.c: use bitmap_parse instead of __bitmap_parse The buffer 'sc.cpu_mask' is a kernel buffer. If bitmap_parse is used instead of __bitmap_parse the extra parameter that indicates a kernel buffer is not needed. Signed-off-by: H Hartley Sweeten Cc: Lars Ellenberg Cc: Philipp Reisner Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 515bcd9..0feab26 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1829,10 +1829,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n /* silently ignore cpu mask on UP kernel */ if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { - err = __bitmap_parse(sc.cpu_mask, 32, 0, + err = bitmap_parse(sc.cpu_mask, 32, cpumask_bits(new_cpu_mask), nr_cpu_ids); if (err) { - dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); + dev_warn(DEV, "bitmap_parse() failed with %d\n", err); retcode = ERR_CPU_MASK_PARSE; goto fail; } -- cgit v1.1 From aec9f377e4f235c47e27fd8a429555dfa2dda342 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 2 Aug 2011 12:43:50 +0200 Subject: drivers/cdrom/cdrom.c: relax check on dvd manufacturer value The report has an ISO which has a very long manufacturer ID. It seems that Linux is wrong, not the ISO maker. Relax the check for the length of this field: emit a warning and truncate the incoming data to 2048 bytes rather than rejecting the entire thing. dvd_manufact.value isn't null-terminated. I'm not even sure if it's a string. The kernel doesn't apepar to use it anyway. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=39062 Reported-by: Tested-by: Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 75fb965..f997c27 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -1929,11 +1929,17 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s, goto out; s->manufact.len = buf[0] << 8 | buf[1]; - if (s->manufact.len < 0 || s->manufact.len > 2048) { + if (s->manufact.len < 0) { cdinfo(CD_WARNING, "Received invalid manufacture info length" " (%d)\n", s->manufact.len); ret = -EIO; } else { + if (s->manufact.len > 2048) { + cdinfo(CD_WARNING, "Received invalid manufacture info " + "length (%d): truncating to 2048\n", + s->manufact.len); + s->manufact.len = 2048; + } memcpy(s->manufact.value, &buf[4], s->manufact.len); } -- cgit v1.1 From f95fe9cfb49f6e625fbb5888cae2ed6f3a276b89 Mon Sep 17 00:00:00 2001 From: Herbert Poetzl Date: Tue, 2 Aug 2011 12:43:50 +0200 Subject: block/genhd.c: remove useless cast in diskstats_show() Remove the (unsigned long long) cast in diskstats_show() and adjusts the seq_printf() format string to 'unsigned long' diskstats_show() uses part_stat_read() to get the stats, which either accesses the specified field in the struct disk_stats directly (non SMP) or sums up the per CPU values in a variable of the same type as the field, so in any case the result will have the same type and range as the specified field which for all disk_stats entries is unsigned long Also, for unsigned long ranges the output of %lu should be identical to the one of %llu, so no change in the actual proc entry contents. Signed-off-by: Herbert Poetzl Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/genhd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 5cb51c5..e2f6790 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v) cpu = part_stat_lock(); part_round_stats(cpu, hd); part_stat_unlock(); - seq_printf(seqf, "%4d %7d %s %lu %lu %llu " - "%u %lu %lu %llu %u %u %u %u\n", + seq_printf(seqf, "%4d %7d %s %lu %lu %lu " + "%u %lu %lu %lu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), disk_name(gp, hd->partno, buf), part_stat_read(hd, ios[READ]), part_stat_read(hd, merges[READ]), - (unsigned long long)part_stat_read(hd, sectors[READ]), + part_stat_read(hd, sectors[READ]), jiffies_to_msecs(part_stat_read(hd, ticks[READ])), part_stat_read(hd, ios[WRITE]), part_stat_read(hd, merges[WRITE]), - (unsigned long long)part_stat_read(hd, sectors[WRITE]), + part_stat_read(hd, sectors[WRITE]), jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), part_in_flight(hd), jiffies_to_msecs(part_stat_read(hd, io_ticks)), -- cgit v1.1 From debc3b778508f59696ff188f0feca271dcbfa7d9 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Tue, 2 Aug 2011 00:01:18 -0500 Subject: PCI: export pcie_bus_configure_settings symbol pcie_bus_configure_settings needs to be exported if the PCI hotplug driver is being compiled as a module. Reported-by: Stephen Rothwell Signed-off-by: Jon Mason Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5becf7c..8473727 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1470,6 +1470,7 @@ void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss) pcie_bus_configure_set(bus->self, &smpss); pci_walk_bus(bus, pcie_bus_configure_set, &smpss); } +EXPORT_SYMBOL_GPL(pcie_bus_configure_settings); unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) { -- cgit v1.1 From ad75b88ac3792ae6a541d9b9fa84e379bd0b29dd Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 3 Aug 2011 12:33:20 +0900 Subject: serial: sh-sci: Fix up default regtype probing. Presently the default regtype probing inadvertently bails out due to an inverted error check. This fixes it up, and gets platforms without explicit regtype specifications working again. Reported-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/tty/serial/sh-sci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index d0a5623..522f69d 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1889,7 +1889,7 @@ static int __devinit sci_init_single(struct platform_device *dev, if (p->regtype == SCIx_PROBE_REGTYPE) { ret = sci_probe_regmap(p); - if (unlikely(!ret)) + if (unlikely(ret != 0)) return ret; } -- cgit v1.1 From 5beabc7fcd99856084e232b37d3280ce353eaf41 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 2 Aug 2011 09:42:54 +0000 Subject: serial: sh-sci: fix DMA build by including dma-mapping.h Include dma-mapping.h to fix build of the sh-sci driver on SH-Mobile ARM (sh73a0) when CONFIG_SERIAL_SH_SCI_DMA=y: drivers/tty/serial/sh-sci.c: In function 'sci_rx_dma_release': drivers/tty/serial/sh-sci.c:1182:3: error: implicit declaration of function 'dma_free_coherent' drivers/tty/serial/sh-sci.c: In function 'work_fn_tx': drivers/tty/serial/sh-sci.c:1333:2: error: implicit declaration of function 'dma_sync_sg_for_device' drivers/tty/serial/sh-sci.c: In function 'sci_request_dma': drivers/tty/serial/sh-sci.c:1498:3: error: implicit declaration of function 'dma_map_sg' drivers/tty/serial/sh-sci.c:1527:3: error: implicit declaration of function 'dma_alloc_coherent' drivers/tty/serial/sh-sci.c:1527:10: warning: assignment makes pointer from integer without a cast make[3]: *** [drivers/tty/serial/sh-sci.o] Error 1 make[2]: *** [drivers/tty/serial] Error 2 make[1]: *** [drivers/tty] Error 2 make: *** [drivers] Error 2 Signed-off-by: Magnus Damm Tested-by: Simon Horman Signed-off-by: Paul Mundt --- drivers/tty/serial/sh-sci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 522f69d..38a81ae 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include -- cgit v1.1 From c84b51e65ea2f256353c339bd87e991b7e64630f Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 31 Jul 2011 21:36:35 +0000 Subject: sh: Fix conflicting definitions of ptrace_triggered The extra nmi argument is causing this compile fail: CC arch/sh/kernel/ptrace_32.o arch/sh/kernel/ptrace_32.c:66:6: error: conflicting types for 'ptrace_triggered' arch/sh/include/asm/ptrace.h:126:13: note: previous declaration of 'ptrace_triggered' was here make[3]: *** [arch/sh/kernel/ptrace_32.o] Error 1 Signed-off-by: Paul Gortmaker Signed-off-by: Paul Mundt --- arch/sh/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h index b97baf8..2d3679b 100644 --- a/arch/sh/include/asm/ptrace.h +++ b/arch/sh/include/asm/ptrace.h @@ -123,7 +123,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, struct perf_event; struct perf_sample_data; -extern void ptrace_triggered(struct perf_event *bp, int nmi, +extern void ptrace_triggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs); #define task_pt_regs(task) \ -- cgit v1.1 From 1ba762209491e2496e58baffa3fd65d661f54404 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 3 Aug 2011 03:47:36 +0000 Subject: serial: sh-sci: console Runtime PM support Add Runtime PM context save/restore support to the SCIF driver. Tested on the AP4EVB console. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/tty/serial/sh-sci.c | 68 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 38a81ae..ffcacee 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -96,6 +96,12 @@ struct sci_port { #endif struct notifier_block freq_transition; + +#ifdef CONFIG_SERIAL_SH_SCI_CONSOLE + unsigned short saved_smr; + unsigned short saved_fcr; + unsigned char saved_brr; +#endif }; /* Function prototypes */ @@ -1634,11 +1640,25 @@ static unsigned int sci_scbrr_calc(unsigned int algo_id, unsigned int bps, return ((freq + 16 * bps) / (32 * bps) - 1); } +static void sci_reset(struct uart_port *port) +{ + unsigned int status; + + do { + status = sci_in(port, SCxSR); + } while (!(status & SCxSR_TEND(port))); + + sci_out(port, SCSCR, 0x00); /* TE=0, RE=0, CKE1=0 */ + + if (port->type != PORT_SCI) + sci_out(port, SCFCR, SCFCR_RFRST | SCFCR_TFRST); +} + static void sci_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old) { struct sci_port *s = to_sci_port(port); - unsigned int status, baud, smr_val, max_baud; + unsigned int baud, smr_val, max_baud; int t = -1; u16 scfcr = 0; @@ -1658,14 +1678,7 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios, sci_port_enable(s); - do { - status = sci_in(port, SCxSR); - } while (!(status & SCxSR_TEND(port))); - - sci_out(port, SCSCR, 0x00); /* TE=0, RE=0, CKE1=0 */ - - if (port->type != PORT_SCI) - sci_out(port, SCFCR, scfcr | SCFCR_RFRST | SCFCR_TFRST); + sci_reset(port); smr_val = sci_in(port, SCSMR) & 3; @@ -2037,7 +2050,8 @@ static int __devinit serial_console_setup(struct console *co, char *options) if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); - /* TODO: disable clock */ + sci_port_disable(sci_port); + return uart_set_options(port, co, baud, parity, bits, flow); } @@ -2080,6 +2094,36 @@ static int __devinit sci_probe_earlyprintk(struct platform_device *pdev) return 0; } +#define uart_console(port) ((port)->cons->index == (port)->line) + +static int sci_runtime_suspend(struct device *dev) +{ + struct sci_port *sci_port = dev_get_drvdata(dev); + struct uart_port *port = &sci_port->port; + + if (uart_console(port)) { + sci_port->saved_smr = sci_in(port, SCSMR); + sci_port->saved_brr = sci_in(port, SCBRR); + sci_port->saved_fcr = sci_in(port, SCFCR); + } + return 0; +} + +static int sci_runtime_resume(struct device *dev) +{ + struct sci_port *sci_port = dev_get_drvdata(dev); + struct uart_port *port = &sci_port->port; + + if (uart_console(port)) { + sci_reset(port); + sci_out(port, SCSMR, sci_port->saved_smr); + sci_out(port, SCBRR, sci_port->saved_brr); + sci_out(port, SCFCR, sci_port->saved_fcr); + sci_out(port, SCSCR, sci_port->cfg->scscr); + } + return 0; +} + #define SCI_CONSOLE (&serial_console) #else @@ -2089,6 +2133,8 @@ static inline int __devinit sci_probe_earlyprintk(struct platform_device *pdev) } #define SCI_CONSOLE NULL +#define sci_runtime_suspend NULL +#define sci_runtime_resume NULL #endif /* CONFIG_SERIAL_SH_SCI_CONSOLE */ @@ -2204,6 +2250,8 @@ static int sci_resume(struct device *dev) } static const struct dev_pm_ops sci_dev_pm_ops = { + .runtime_suspend = sci_runtime_suspend, + .runtime_resume = sci_runtime_resume, .suspend = sci_suspend, .resume = sci_resume, }; -- cgit v1.1 From f41c53a569c4cf0556893ec9cfcf697d069799e1 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 3 Aug 2011 15:02:55 +0200 Subject: block: swim3: fix unterminated of_device_id table of_device_id structures need a NULL terminating entry, add it. Signed-off-by: Axel Lin Signed-off-by: Jens Axboe --- drivers/block/swim3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 773bfa7..ae3e167 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1184,6 +1184,7 @@ static struct of_device_id swim3_match[] = { .compatible = "swim3" }, + { /* end of list */ } }; static struct macio_driver swim3_driver = -- cgit v1.1 From 88c9e42196285a7c573e2abda11a4b5037c669bc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 2 Aug 2011 09:57:35 +0200 Subject: nfs: add missing prefetch.h include Fix this compile error on s390: CC [M] fs/nfs/blocklayout/blocklayout.o fs/nfs/blocklayout/blocklayout.c: In function 'bl_end_io_read': fs/nfs/blocklayout/blocklayout.c:201:4: error: implicit declaration of function 'prefetchw' Introduced with 9549ec01 "pnfsblock: bl_read_pagelist". Cc: Fred Isaman Signed-off-by: Heiko Carstens Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index e56564d..9561c8f 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -36,6 +36,7 @@ #include #include /* struct bio */ #include /* various write calls */ +#include #include "blocklayout.h" -- cgit v1.1 From 20618b21da0796115e81906d24ff1601552701b7 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 3 Aug 2011 21:54:33 -0700 Subject: pnfs-obj: Bug when we are running out of bio When we have a situation that the number of pages we want to encode is bigger then the size of the bio. (Which can currently happen only when all IO is going to a single device .e.g group_width==1) then the IO is submitted short and we report back only the amount of bytes we actually wrote/read and all is fine. BUT ... There was a bug that the current length counter was advanced before the fail to add the extra page, and we come to a situation that the CDB length was one-page longer then the actual bio size, which is of course rejected by the osd-target. While here also fix the bio size calculation, in the case that we received more then one group of devices. CC: Stable Tree Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 9383ca7..aa8663a 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -589,22 +589,19 @@ static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, } static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, - unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, + unsigned pgbase, struct _objio_per_comp *per_dev, int len, gfp_t gfp_flags) { unsigned pg = *cur_pg; + int cur_len = len; struct request_queue *q = osd_request_queue(_io_od(ios, per_dev->dev)); - per_dev->length += cur_len; - if (per_dev->bio == NULL) { - unsigned stripes = ios->layout->num_comps / - ios->layout->mirrors_p1; - unsigned pages_in_stripe = stripes * + unsigned pages_in_stripe = ios->layout->group_width * (ios->layout->stripe_unit / PAGE_SIZE); unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / - stripes; + ios->layout->group_width; if (BIO_MAX_PAGES_KMALLOC < bio_size) bio_size = BIO_MAX_PAGES_KMALLOC; @@ -632,6 +629,7 @@ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, } BUG_ON(cur_len); + per_dev->length += len; *cur_pg = pg; return 0; } -- cgit v1.1 From 9af7db3228acc286c50e3a0f054ec982efdbc6c6 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 3 Aug 2011 21:52:51 -0700 Subject: pnfs-obj: Fix the comp_index != 0 case There were bugs in the case of partial layout where olo_comp_index is not zero. This used to work and was tested but one of the later cleanup SQUASHMEs broke it and was not tested since. Also add a dprint that specify those received layout parameters. Everything else was already printed. [Needed in v3.0] CC: Stable Tree Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 16 +++++++--------- fs/nfs/objlayout/pnfs_osd_xdr_cli.c | 3 +++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index aa8663a..d0cda12 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -479,7 +479,6 @@ static int _io_check(struct objio_state *ios, bool is_write) for (i = 0; i < ios->numdevs; i++) { struct osd_sense_info osi; struct osd_request *or = ios->per_dev[i].or; - unsigned dev; int ret; if (!or) @@ -500,9 +499,8 @@ static int _io_check(struct objio_state *ios, bool is_write) continue; /* we recovered */ } - dev = ios->per_dev[i].dev; - objlayout_io_set_result(&ios->ol_state, dev, - &ios->layout->comps[dev].oc_object_id, + objlayout_io_set_result(&ios->ol_state, i, + &ios->layout->comps[i].oc_object_id, osd_pri_2_pnfs_err(osi.osd_err_pri), ios->per_dev[i].offset, ios->per_dev[i].length, @@ -648,7 +646,7 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, int ret = 0; while (length) { - struct _objio_per_comp *per_dev = &ios->per_dev[dev]; + struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev]; unsigned cur_len, page_off = 0; if (!per_dev->length) { @@ -668,8 +666,8 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, cur_len = stripe_unit; } - if (max_comp < dev) - max_comp = dev; + if (max_comp < dev - first_dev) + max_comp = dev - first_dev; } else { cur_len = stripe_unit; } @@ -804,7 +802,7 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; unsigned dev = per_dev->dev; struct pnfs_osd_object_cred *cred = - &ios->layout->comps[dev]; + &ios->layout->comps[cur_comp]; struct osd_obj_id obj = { .partition = cred->oc_object_id.oid_partition_id, .id = cred->oc_object_id.oid_object_id, @@ -902,7 +900,7 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) for (; cur_comp < last_comp; ++cur_comp, ++dev) { struct osd_request *or = NULL; struct pnfs_osd_object_cred *cred = - &ios->layout->comps[dev]; + &ios->layout->comps[cur_comp]; struct osd_obj_id obj = { .partition = cred->oc_object_id.oid_partition_id, .id = cred->oc_object_id.oid_object_id, diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c index 16fc758..b3918f7 100644 --- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c @@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, p = _osd_xdr_decode_data_map(p, &layout->olo_map); layout->olo_comps_index = be32_to_cpup(p++); layout->olo_num_comps = be32_to_cpup(p++); + dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__, + layout->olo_comps_index, layout->olo_num_comps); + iter->total_comps = layout->olo_num_comps; return 0; } -- cgit v1.1 From 55a673990ec04cf63005318bcf08c2b0046e5778 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Aug 2011 14:46:29 -0400 Subject: NFSv4.1: Fix the callback 'highest_used_slotid' behaviour Currently, there is no guarantee that we will call nfs4_cb_take_slot() even though nfs4_callback_compound() will consistently call nfs4_cb_free_slot() provided the cb_process_state has set the 'clp' field. The result is that we can trigger the BUG_ON() upon the next call to nfs4_cb_take_slot(). This patch fixes the above problem by using the slot id that was taken in the CB_SEQUENCE operation as a flag for whether or not we need to call nfs4_cb_free_slot(). It also fixes an atomicity problem: we need to set tbl->highest_used_slotid atomically with the check for NFS4_SESSION_DRAINING, otherwise we end up racing with the various tests in nfs4_begin_drain_session(). Cc: stable@kernel.org [2.6.38+] Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 2 +- fs/nfs/callback_proc.c | 20 ++++++++++++++------ fs/nfs/callback_xdr.c | 24 +++++++----------------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index b257383..07df5f1 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -38,6 +38,7 @@ enum nfs4_callback_opnum { struct cb_process_state { __be32 drc_status; struct nfs_client *clp; + int slotid; }; struct cb_compound_hdr_arg { @@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall( void *dummy, struct cb_process_state *cps); extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); -extern void nfs4_cb_take_slot(struct nfs_client *clp); struct cb_devicenotifyitem { uint32_t cbd_notify_type; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 74780f9..0ab8202 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -348,7 +348,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) /* Normal */ if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { slot->seq_nr++; - return htonl(NFS4_OK); + goto out_ok; } /* Replay */ @@ -367,11 +367,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) /* Wraparound */ if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { slot->seq_nr = 1; - return htonl(NFS4_OK); + goto out_ok; } /* Misordered request */ return htonl(NFS4ERR_SEQ_MISORDERED); +out_ok: + tbl->highest_used_slotid = args->csa_slotid; + return htonl(NFS4_OK); } /* @@ -433,26 +436,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res, struct cb_process_state *cps) { + struct nfs4_slot_table *tbl; struct nfs_client *clp; int i; __be32 status = htonl(NFS4ERR_BADSESSION); - cps->clp = NULL; - clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); if (clp == NULL) goto out; + tbl = &clp->cl_session->bc_slot_table; + + spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { - status = NFS4ERR_DELAY; + spin_unlock(&tbl->slot_tbl_lock); + status = htonl(NFS4ERR_DELAY); goto out; } status = validate_seqid(&clp->cl_session->bc_slot_table, args); + spin_unlock(&tbl->slot_tbl_lock); if (status) goto out; + cps->slotid = args->csa_slotid; + /* * Check for pending referring calls. If a match is found, a * related callback was received before the response to the original @@ -469,7 +478,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, res->csr_slotid = args->csa_slotid; res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; - nfs4_cb_take_slot(clp); out: cps->clp = clp; /* put in nfs4_callback_compound */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c6c86a7..918ad64 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) * Let the state manager know callback processing done. * A single slot, so highest used slotid is either 0 or -1 */ - tbl->highest_used_slotid--; + tbl->highest_used_slotid = -1; nfs4_check_drain_bc_complete(session); spin_unlock(&tbl->slot_tbl_lock); } -static void nfs4_cb_free_slot(struct nfs_client *clp) +static void nfs4_cb_free_slot(struct cb_process_state *cps) { - if (clp && clp->cl_session) - nfs4_callback_free_slot(clp->cl_session); -} - -/* A single slot, so highest used slotid is either 0 or -1 */ -void nfs4_cb_take_slot(struct nfs_client *clp) -{ - struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table; - - spin_lock(&tbl->slot_tbl_lock); - tbl->highest_used_slotid++; - BUG_ON(tbl->highest_used_slotid != 0); - spin_unlock(&tbl->slot_tbl_lock); + if (cps->slotid != -1) + nfs4_callback_free_slot(cps->clp->cl_session); } #else /* CONFIG_NFS_V4_1 */ @@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) return htonl(NFS4ERR_MINOR_VERS_MISMATCH); } -static void nfs4_cb_free_slot(struct nfs_client *clp) +static void nfs4_cb_free_slot(struct cb_process_state *cps) { } #endif /* CONFIG_NFS_V4_1 */ @@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_process_state cps = { .drc_status = 0, .clp = NULL, + .slotid = -1, }; unsigned int nops = 0; @@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r *hdr_res.status = status; *hdr_res.nops = htonl(nops); - nfs4_cb_free_slot(cps.clp); + nfs4_cb_free_slot(&cps); nfs_put_client(cps.clp); dprintk("%s: done, status = %u\n", __func__, ntohl(status)); return rpc_success; -- cgit v1.1 From 910ac68a2b80c7de95bc8488734067b1bb15d583 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Aug 2011 14:46:52 -0400 Subject: NFSv4.1: Return NFS4ERR_BADSESSION to callbacks during session resets If the client is in the process of resetting the session when it receives a callback, then returning NFS4ERR_DELAY may cause a deadlock with the DESTROY_SESSION call. Basically, if the client returns NFS4ERR_DELAY in response to the CB_SEQUENCE call, then the server is entitled to believe that the client is busy because it is already processing that call. In that case, the server is perfectly entitled to respond with a NFS4ERR_BACK_CHAN_BUSY to any DESTROY_SESSION call. Fix this by having the client reply with a NFS4ERR_BADSESSION in response to the callback if it is resetting the session. Cc: stable@kernel.org [2.6.38+] Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 0ab8202..43926ad 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -452,6 +452,11 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { spin_unlock(&tbl->slot_tbl_lock); status = htonl(NFS4ERR_DELAY); + /* Return NFS4ERR_BADSESSION if we're draining the session + * in order to reset it. + */ + if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) + status = htonl(NFS4ERR_BADSESSION); goto out; } -- cgit v1.1 From 18b08c55a9b04c8783420fb6657599ad724459cc Mon Sep 17 00:00:00 2001 From: Deepak Saxena Date: Thu, 4 Aug 2011 23:39:58 -0700 Subject: Input: remove CLOCK_TICK_RATE from analog joystick driver The analog joystick driver is written for x86 systems. This patch updates it to use the PIT_TICK_RATE value instead of CLOCK_TICK_RATE as they are equivalent on x86 and we want to deprecate the latter. Signed-off-by: Deepak Saxena Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/analog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 4afe0a3..c021317 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -139,7 +139,7 @@ struct analog_port { #include #define GET_TIME(x) do { if (cpu_has_tsc) rdtscl(x); else x = get_time_pit(); } while (0) -#define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? CLOCK_TICK_RATE / HZ : 0))) +#define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0))) #define TIME_NAME (cpu_has_tsc?"TSC":"PIT") static unsigned int get_time_pit(void) { -- cgit v1.1 From 35ae66e0a09ab70ed588e65f26b4c725cd1656b6 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Fri, 5 Aug 2011 09:37:10 +0200 Subject: block: Make rq_affinity = 1 work as expected Commit 5757a6d76c introduced a new rq_affinity = 2 so as to make the request completed in the __make_request cpu. But it makes the old rq_affinity = 1 not work any more. The root cause is that if the 'cpu' and 'req->cpu' is in the same group and cpu != req->cpu, ccpu will be the same as group_cpu, so the completion will be excuted in the 'cpu' not 'group_cpu'. This patch fix problem by simpling removing group_cpu and the codes are more explicit now. If ccpu == cpu, we complete in cpu, otherwise we raise_blk_irq to ccpu. Cc: Christoph Hellwig Cc: Roland Dreier Cc: Dan Williams Cc: Jens Axboe Signed-off-by: Tao Ma Reviewed-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-softirq.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 475fab8..487addc 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -103,7 +103,7 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = { void __blk_complete_request(struct request *req) { - int ccpu, cpu, group_cpu = NR_CPUS; + int ccpu, cpu; struct request_queue *q = req->q; unsigned long flags; @@ -117,14 +117,12 @@ void __blk_complete_request(struct request *req) */ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) { ccpu = req->cpu; - if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) { + if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) ccpu = blk_cpu_to_group(ccpu); - group_cpu = blk_cpu_to_group(cpu); - } } else ccpu = cpu; - if (ccpu == cpu || ccpu == group_cpu) { + if (ccpu == cpu) { struct list_head *list; do_local: list = &__get_cpu_var(blk_cpu_done); -- cgit v1.1 From 4931402a9dd00b2997e95bfbb89409b2a6dbb383 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 5 Aug 2011 09:42:20 +0200 Subject: cfq-iosched: Add documentation about idling There are always questions about why CFQ is idling on various conditions. Recent ones is Christoph asking again why to idle on REQ_NOIDLE. His assertion is that XFS is relying more and more on workqueues and is concerned that CFQ idling on IO from every workqueue will impact XFS badly. So he suggested that I add some more documentation about CFQ idling and that can provide more clarity on the topic and also gives an opprotunity to poke a hole in theory and lead to improvements. So here is my attempt at that. Any comments are welcome. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- Documentation/block/cfq-iosched.txt | 71 +++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt index e578fee..6d670f5 100644 --- a/Documentation/block/cfq-iosched.txt +++ b/Documentation/block/cfq-iosched.txt @@ -43,3 +43,74 @@ If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches to IOPS mode and starts providing fairness in terms of number of requests dispatched. Note that this mode switching takes effect only for group scheduling. For non-cgroup users nothing should change. + +CFQ IO scheduler Idling Theory +=============================== +Idling on a queue is primarily about waiting for the next request to come +on same queue after completion of a request. In this process CFQ will not +dispatch requests from other cfq queues even if requests are pending there. + +The rationale behind idling is that it can cut down on number of seeks +on rotational media. For example, if a process is doing dependent +sequential reads (next read will come on only after completion of previous +one), then not dispatching request from other queue should help as we +did not move the disk head and kept on dispatching sequential IO from +one queue. + +CFQ has following service trees and various queues are put on these trees. + + sync-idle sync-noidle async + +All cfq queues doing synchronous sequential IO go on to sync-idle tree. +On this tree we idle on each queue individually. + +All synchronous non-sequential queues go on sync-noidle tree. Also any +request which are marked with REQ_NOIDLE go on this service tree. On this +tree we do not idle on individual queues instead idle on the whole group +of queues or the tree. So if there are 4 queues waiting for IO to dispatch +we will idle only once last queue has dispatched the IO and there is +no more IO on this service tree. + +All async writes go on async service tree. There is no idling on async +queues. + +CFQ has some optimizations for SSDs and if it detects a non-rotational +media which can support higher queue depth (multiple requests at in +flight at a time), then it cuts down on idling of individual queues and +all the queues move to sync-noidle tree and only tree idle remains. This +tree idling provides isolation with buffered write queues on async tree. + +FAQ +=== +Q1. Why to idle at all on queues marked with REQ_NOIDLE. + +A1. We only do tree idle (all queues on sync-noidle tree) on queues marked + with REQ_NOIDLE. This helps in providing isolation with all the sync-idle + queues. Otherwise in presence of many sequential readers, other + synchronous IO might not get fair share of disk. + + For example, if there are 10 sequential readers doing IO and they get + 100ms each. If a REQ_NOIDLE request comes in, it will be scheduled + roughly after 1 second. If after completion of REQ_NOIDLE request we + do not idle, and after a couple of milli seconds a another REQ_NOIDLE + request comes in, again it will be scheduled after 1second. Repeat it + and notice how a workload can lose its disk share and suffer due to + multiple sequential readers. + + fsync can generate dependent IO where bunch of data is written in the + context of fsync, and later some journaling data is written. Journaling + data comes in only after fsync has finished its IO (atleast for ext4 + that seemed to be the case). Now if one decides not to idle on fsync + thread due to REQ_NOIDLE, then next journaling write will not get + scheduled for another second. A process doing small fsync, will suffer + badly in presence of multiple sequential readers. + + Hence doing tree idling on threads using REQ_NOIDLE flag on requests + provides isolation from multiple sequential readers and at the same + time we do not idle on individual threads. + +Q2. When to specify REQ_NOIDLE +A2. I would think whenever one is doing synchronous write and not expecting + more writes to be dispatched from same context soon, should be able + to specify REQ_NOIDLE on writes and that probably should work well for + most of the cases. -- cgit v1.1 From 2ab1ba68aeaecd41c4b34f0eaf1d70a37367fb1a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 4 Aug 2011 14:28:36 -0400 Subject: Btrfs: force unplugs when switching from high to regular priority bios Btrfs does bio submissions from a worker thread, and each device has a list of high priority bios and regular priority bios. Synchronous writes go to the high priority thread while async writes go to regular list. This commit brings back an explicit unplug any time we switch from high to regular priority, which makes it easier for the block layer to give us low latencies. Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 53875ae73..3c5f2fc 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -142,6 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) unsigned long limit; unsigned long last_waited = 0; int force_reg = 0; + int sync_pending; struct blk_plug plug; /* @@ -229,6 +230,22 @@ loop_lock: BUG_ON(atomic_read(&cur->bi_cnt) == 0); + /* + * if we're doing the sync list, record that our + * plug has some sync requests on it + * + * If we're doing the regular list and there are + * sync requests sitting around, unplug before + * we add more + */ + if (pending_bios == &device->pending_sync_bios) { + sync_pending = 1; + } else if (sync_pending) { + blk_finish_plug(&plug); + blk_start_plug(&plug); + sync_pending = 0; + } + submit_bio(cur->bi_rw, cur); num_run++; batch_run++; -- cgit v1.1 From a3ea14df0e383f44dcb2e61badb71180dbffe526 Mon Sep 17 00:00:00 2001 From: Paul Fox Date: Tue, 26 Jul 2011 16:42:26 +0100 Subject: x86, olpc: Wait for last byte of EC command to be accepted When executing EC commands, only waiting when there are still more bytes to write is usually fine. However, if the system suspends very quickly after a call to olpc_ec_cmd(), the last data byte may not yet be transferred to the EC, and the command will not complete. This solves a bug where the SCI wakeup mask was not correctly written when going into suspend. It means that sometimes, on XO-1.5 (but not XO-1), the devices that were marked as wakeup sources can't wake up the system. e.g. you ask for wifi wakeups, suspend, but then incoming wifi frames don't wake up the system as they should. Signed-off-by: Paul Fox Signed-off-by: Daniel Drake Acked-by: Andres Salomon Cc: Signed-off-by: Ingo Molnar --- arch/x86/platform/olpc/olpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c index 8b9940e..7cce722 100644 --- a/arch/x86/platform/olpc/olpc.c +++ b/arch/x86/platform/olpc/olpc.c @@ -161,13 +161,13 @@ restart: if (inbuf && inlen) { /* write data to EC */ for (i = 0; i < inlen; i++) { + pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]); + outb(inbuf[i], 0x68); if (wait_on_ibf(0x6c, 0)) { printk(KERN_ERR "olpc-ec: timeout waiting for" " EC accept data!\n"); goto err; } - pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]); - outb(inbuf[i], 0x68); } } if (outbuf && outlen) { -- cgit v1.1 From 05e33fc20ea5e493a2a1e7f1d04f43cdf89f83ed Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Fri, 5 Aug 2011 09:09:00 -0500 Subject: x86, UV: Remove UV delay in starting slave cpus Delete the 10 msec delay between the INIT and SIPI when starting slave cpus. I can find no requirement for this delay. BIOS also has similar code sequences without the delay. Removing the delay reduces boot time by 40 sec. Every bit helps. Signed-off-by: Jack Steiner Cc: Link: http://lkml.kernel.org/r/20110805140900.GA6774@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index adc66c3..34b1859 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_INIT; uv_write_global_mmr64(pnode, UVH_IPI_INT, val); - mdelay(10); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | -- cgit v1.1 From c66d3fcbf306af3c0c4b6f4e0d81467f89c67702 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 8 Aug 2011 16:30:11 +0900 Subject: sh: Fix up fallout from cpuidle changes. Fixes up the pm_idle redefinition that was introduced with the earlier cpuidle changes. Signed-off-by: Paul Mundt --- arch/sh/kernel/idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 32114e0..db4ecd7 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -22,7 +22,7 @@ #include #include -static void (*pm_idle)(void); +void (*pm_idle)(void); static int hlt_counter; -- cgit v1.1 From b3623080ff6974e696710b6c6eb4cdbf2bbab347 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 3 Aug 2011 06:08:54 +0000 Subject: mmc: sdhi, mmcif: zboot: Correct clock disable logic This corrects a logic-error that I made in the original implementation. An alternate patch would be to just remove these lines and leave the clock running as it is reconfigured later on during boot anyway. Signed-off-by: Simon Horman Signed-off-by: Paul Mundt --- arch/arm/boot/compressed/mmcif-sh7372.c | 2 +- arch/arm/boot/compressed/sdhi-sh7372.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/compressed/mmcif-sh7372.c b/arch/arm/boot/compressed/mmcif-sh7372.c index b6f61d9..672ae95 100644 --- a/arch/arm/boot/compressed/mmcif-sh7372.c +++ b/arch/arm/boot/compressed/mmcif-sh7372.c @@ -82,7 +82,7 @@ asmlinkage void mmc_loader(unsigned char *buf, unsigned long len) /* Disable clock to MMC hardware block */ - __raw_writel(__raw_readl(SMSTPCR3) & (1 << 12), SMSTPCR3); + __raw_writel(__raw_readl(SMSTPCR3) | (1 << 12), SMSTPCR3); mmc_update_progress(MMC_PROGRESS_DONE); } diff --git a/arch/arm/boot/compressed/sdhi-sh7372.c b/arch/arm/boot/compressed/sdhi-sh7372.c index d403a8b..d279294 100644 --- a/arch/arm/boot/compressed/sdhi-sh7372.c +++ b/arch/arm/boot/compressed/sdhi-sh7372.c @@ -85,7 +85,7 @@ asmlinkage void mmc_loader(unsigned short *buf, unsigned long len) goto err; /* Disable clock to SDHI1 hardware block */ - __raw_writel(__raw_readl(SMSTPCR3) & (1 << 13), SMSTPCR3); + __raw_writel(__raw_readl(SMSTPCR3) | (1 << 13), SMSTPCR3); mmc_update_progress(MMC_PROGRESS_DONE); -- cgit v1.1 From fe43756143c4efa6f8fdef07aa3ca4dc0be24ada Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 17 Jun 2011 08:21:21 +0000 Subject: ARM: mach-shmobile: mackerel: Add USB-DMA ID This patch use channel0 as Tx, and channel1 as Rx Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-mackerel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index d41c01f..e1d9b6a 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -641,6 +641,8 @@ static struct usbhs_private usbhs0_private = { }, .driver_param = { .buswait_bwait = 4, + .d0_tx_id = SHDMA_SLAVE_USB0_TX, + .d1_rx_id = SHDMA_SLAVE_USB0_RX, }, }, }; @@ -810,6 +812,8 @@ static struct usbhs_private usbhs1_private = { .buswait_bwait = 4, .pipe_type = usbhs1_pipe_cfg, .pipe_size = ARRAY_SIZE(usbhs1_pipe_cfg), + .d0_tx_id = SHDMA_SLAVE_USB1_TX, + .d1_rx_id = SHDMA_SLAVE_USB1_RX, }, }, }; -- cgit v1.1 From 37fb3a30b46237f23cfdf7ee09d49f9888dd13bf Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 8 Aug 2011 16:08:08 +0200 Subject: fuse: fix flock Commit a9ff4f87 "fuse: support BSD locking semantics" overlooked a number of issues with supporing flock locks over existing POSIX locking infrastructure: - it's not backward compatible, passing flock(2) calls to userspace unconditionally (if userspace sets FUSE_POSIX_LOCKS) - it doesn't cater for the fact that flock locks are automatically unlocked on file release - it doesn't take into account the fact that flock exclusive locks (write locks) don't need an fd opened for write. The last one invalidates the original premise of the patch that flock locks can be emulated with POSIX locks. This patch fixes the first two issues. The last one needs to be fixed in userspace if the filesystem assumed that a write lock will happen only on a file operned for write (as in the case of the current fuse library). Reported-by: Sebastian Pipping Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 11 ++++++++++- fs/fuse/fuse_i.h | 8 +++++++- fs/fuse/inode.c | 8 +++++++- include/linux/fuse.h | 9 ++++++++- 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 82a6646..e327849 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -245,6 +245,12 @@ void fuse_release_common(struct file *file, int opcode) req = ff->reserved_req; fuse_prepare_release(ff, file->f_flags, opcode); + if (ff->flock) { + struct fuse_release_in *inarg = &req->misc.release.in; + inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; + inarg->lock_owner = fuse_lock_owner_id(ff->fc, + (fl_owner_t) file); + } /* Hold vfsmount and dentry until release is finished */ path_get(&file->f_path); req->misc.release.path = file->f_path; @@ -1547,11 +1553,14 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (fc->no_lock) { + if (fc->no_flock) { err = flock_lock_file_wait(file, fl); } else { + struct fuse_file *ff = file->private_data; + /* emulate flock with POSIX locks */ fl->fl_owner = (fl_owner_t) file; + ff->flock = true; err = fuse_setlk(file, fl, 1); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b788bec..eb8c613 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -135,6 +135,9 @@ struct fuse_file { /** Wait queue head for poll */ wait_queue_head_t poll_wait; + + /** Has flock been performed on this file? */ + bool flock:1; }; /** One input argument of a request */ @@ -448,7 +451,7 @@ struct fuse_conn { /** Is removexattr not implemented by fs? */ unsigned no_removexattr:1; - /** Are file locking primitives not implemented by fs? */ + /** Are posix file locking primitives not implemented by fs? */ unsigned no_lock:1; /** Is access not implemented by fs? */ @@ -472,6 +475,9 @@ struct fuse_conn { /** Don't apply umask to creation modes */ unsigned dont_mask:1; + /** Are BSD file locking primitives not implemented by fs? */ + unsigned no_flock:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 5354906e..f541d63 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -809,6 +809,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_read = 1; if (!(arg->flags & FUSE_POSIX_LOCKS)) fc->no_lock = 1; + if (arg->minor >= 17) { + if (!(arg->flags & FUSE_FLOCK_LOCKS)) + fc->no_flock = 1; + } if (arg->flags & FUSE_ATOMIC_O_TRUNC) fc->atomic_o_trunc = 1; if (arg->minor >= 9) { @@ -823,6 +827,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; + fc->no_flock = 1; } fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); @@ -843,7 +848,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | - FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK; + FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | + FUSE_FLOCK_LOCKS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index d464de5..464cff5 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -47,6 +47,9 @@ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' * - add FUSE_IOCTL_32BIT flag + * + * 7.17 + * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK */ #ifndef _LINUX_FUSE_H @@ -78,7 +81,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 16 +#define FUSE_KERNEL_MINOR_VERSION 17 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -153,8 +156,10 @@ struct fuse_file_lock { /** * INIT request/reply flags * + * FUSE_POSIX_LOCKS: remote locking for POSIX file locks * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." * FUSE_DONT_MASK: don't apply umask to file mode on create operations + * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -163,6 +168,7 @@ struct fuse_file_lock { #define FUSE_EXPORT_SUPPORT (1 << 4) #define FUSE_BIG_WRITES (1 << 5) #define FUSE_DONT_MASK (1 << 6) +#define FUSE_FLOCK_LOCKS (1 << 10) /** * CUSE INIT request/reply flags @@ -175,6 +181,7 @@ struct fuse_file_lock { * Release flags */ #define FUSE_RELEASE_FLUSH (1 << 0) +#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) /** * Getattr flags -- cgit v1.1 From b40cdd56dfa065c0832905e266b39f79419e6914 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 25 Jul 2011 22:35:34 +0200 Subject: fuse: delete dead .write_begin and .write_end aops Ever since 'ea9b990 fuse: implement perform_write', the .write_begin and .write_end aops have been dead code. Their task - acquiring a page from the page cache, sending out a write request and releasing the page again - is now done batch-wise to maximize the number of pages send per userspace request. Signed-off-by: Johannes Weiner Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 70 ---------------------------------------------------------- 1 file changed, 70 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e327849..ab5b84e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -749,18 +749,6 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, return req->misc.write.out.size; } -static int fuse_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - pgoff_t index = pos >> PAGE_CACHE_SHIFT; - - *pagep = grab_cache_page_write_begin(mapping, index, flags); - if (!*pagep) - return -ENOMEM; - return 0; -} - void fuse_write_update_size(struct inode *inode, loff_t pos) { struct fuse_conn *fc = get_fuse_conn(inode); @@ -773,62 +761,6 @@ void fuse_write_update_size(struct inode *inode, loff_t pos) spin_unlock(&fc->lock); } -static int fuse_buffered_write(struct file *file, struct inode *inode, - loff_t pos, unsigned count, struct page *page) -{ - int err; - size_t nres; - struct fuse_conn *fc = get_fuse_conn(inode); - unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - struct fuse_req *req; - - if (is_bad_inode(inode)) - return -EIO; - - /* - * Make sure writepages on the same page are not mixed up with - * plain writes. - */ - fuse_wait_on_page_writeback(inode, page->index); - - req = fuse_get_req(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - - req->in.argpages = 1; - req->num_pages = 1; - req->pages[0] = page; - req->page_offset = offset; - nres = fuse_send_write(req, file, pos, count, NULL); - err = req->out.h.error; - fuse_put_request(fc, req); - if (!err && !nres) - err = -EIO; - if (!err) { - pos += nres; - fuse_write_update_size(inode, pos); - if (count == PAGE_CACHE_SIZE) - SetPageUptodate(page); - } - fuse_invalidate_attr(inode); - return err ? err : nres; -} - -static int fuse_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - struct inode *inode = mapping->host; - int res = 0; - - if (copied) - res = fuse_buffered_write(file, inode, pos, copied, page); - - unlock_page(page); - page_cache_release(page); - return res; -} - static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, struct inode *inode, loff_t pos, size_t count) @@ -2181,8 +2113,6 @@ static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, .writepage = fuse_writepage, .launder_page = fuse_launder_page, - .write_begin = fuse_write_begin, - .write_end = fuse_write_end, .readpages = fuse_readpages, .set_page_dirty = __set_page_dirty_nobuffers, .bmap = fuse_bmap, -- cgit v1.1 From 478e0841b3dce3edc2c67bf0fc51af30f582e9e2 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 25 Jul 2011 22:35:35 +0200 Subject: fuse: mark pages accessed when written to As fuse does not use the page cache library functions when userspace writes to a file, it did not benefit from 'c8236db mm: mark page accessed before we write_end()' that made sure pages are properly marked accessed when written to. Signed-off-by: Johannes Weiner Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ab5b84e..7155f49 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -14,6 +14,7 @@ #include #include #include +#include static const struct file_operations fuse_direct_io_file_operations; @@ -834,6 +835,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, pagefault_enable(); flush_dcache_page(page); + mark_page_accessed(page); + if (!tmp) { unlock_page(page); page_cache_release(page); -- cgit v1.1 From a88769cde24fcef11219cf99193ee558d1028217 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 27 Jul 2011 10:11:28 -0700 Subject: firmware: fix google/gsmi.c build warning Modify function parameter type to match expected type. Fixes a build warning: drivers/firmware/google/gsmi.c:473: warning: initialization from incompatible pointer type Signed-off-by: Randy Dunlap Cc: Mike Waychison Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/gsmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c index 68810fd..aa83de9 100644 --- a/drivers/firmware/google/gsmi.c +++ b/drivers/firmware/google/gsmi.c @@ -420,7 +420,7 @@ static efi_status_t gsmi_get_next_variable(unsigned long *name_size, static efi_status_t gsmi_set_variable(efi_char16_t *name, efi_guid_t *vendor, - unsigned long attr, + u32 attr, unsigned long data_size, void *data) { -- cgit v1.1 From 7de636fa25c19fc4187f85f8325c50b1b21a6d8b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 27 Jul 2011 12:11:25 -0700 Subject: driver core: fix kernel-doc warning in platform.c Warning(drivers/base/platform.c:50): No description found for parameter 'pdev' Warning(drivers/base/platform.c:50): Excess function parameter 'dev' description in 'arch_setup_pdev_archdata' Signed-off-by: Randy Dunlap Cc: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 0cad9c7..99a5272 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL_GPL(platform_bus); /** * arch_setup_pdev_archdata - Allow manipulation of archdata before its used - * @dev: platform device + * @pdev: platform device * * This is called before platform_device_add() such that any pdev_archdata may * be setup before the platform_notifier is called. So if a user needs to -- cgit v1.1 From f9e0b159dbff693bacb64a929e04f442df985b50 Mon Sep 17 00:00:00 2001 From: Arnaud Lacombe Date: Thu, 21 Jul 2011 13:16:19 -0400 Subject: drivers/base/devtmpfs.c: correct annotation of `setup_done' This fixes the following section mismatch issue: WARNING: vmlinux.o(.text+0x1192bf): Section mismatch in reference from the function devtmpfsd() to the variable .init.data:setup_done The function devtmpfsd() references the variable __initdata setup_done. This is often because devtmpfsd lacks a __initdata annotation or the annotation of setup_done is wrong. WARNING: vmlinux.o(.text+0x119342): Section mismatch in reference from the function devtmpfsd() to the variable .init.data:setup_done The function devtmpfsd() references the variable __initdata setup_done. This is often because devtmpfsd lacks a __initdata annotation or the annotation of setup_done is wrong. Signed-off-by: Arnaud Lacombe Signed-off-by: Greg Kroah-Hartman --- drivers/base/devtmpfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 33e1bed..a4760e0 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -376,7 +376,7 @@ int devtmpfs_mount(const char *mntdir) return err; } -static __initdata DECLARE_COMPLETION(setup_done); +static DECLARE_COMPLETION(setup_done); static int handle(const char *name, mode_t mode, struct device *dev) { -- cgit v1.1 From b882fc1b03d46e67ffe06133f4f4532db6e8dd0d Mon Sep 17 00:00:00 2001 From: Kukjin Kim Date: Thu, 28 Jul 2011 08:50:38 +0900 Subject: serial: samsung: Fix build error drivers/tty/serial/samsung.c: In function 's3c24xx_serial_init': drivers/tty/serial/samsung.c:1237: error: lvalue required as unary '&' operand Cc: Greg Kroah-Hartman Signed-off-by: Kukjin Kim Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index afc6294..6edafb5 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -1225,15 +1225,19 @@ static const struct dev_pm_ops s3c24xx_serial_pm_ops = { .suspend = s3c24xx_serial_suspend, .resume = s3c24xx_serial_resume, }; +#define SERIAL_SAMSUNG_PM_OPS (&s3c24xx_serial_pm_ops) + #else /* !CONFIG_PM_SLEEP */ -#define s3c24xx_serial_pm_ops NULL + +#define SERIAL_SAMSUNG_PM_OPS NULL #endif /* CONFIG_PM_SLEEP */ int s3c24xx_serial_init(struct platform_driver *drv, struct s3c24xx_uart_info *info) { dbg("s3c24xx_serial_init(%p,%p)\n", drv, info); - drv->driver.pm = &s3c24xx_serial_pm_ops; + + drv->driver.pm = SERIAL_SAMSUNG_PM_OPS; return platform_driver_register(drv); } -- cgit v1.1 From cd566c64f50e568c0ac3c13bdd15f523631ce845 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 8 Aug 2011 23:39:59 -0700 Subject: Input: mma8450 - fix module device table type The module device table for of_device_id should use "of" type. Signed-off-by: Axel Lin Signed-off-by: Dmitry Torokhov --- drivers/input/misc/mma8450.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c index 6c76cf7..0794778 100644 --- a/drivers/input/misc/mma8450.c +++ b/drivers/input/misc/mma8450.c @@ -234,7 +234,7 @@ static const struct of_device_id mma8450_dt_ids[] = { { .compatible = "fsl,mma8450", }, { /* sentinel */ } }; -MODULE_DEVICE_TABLE(i2c, mma8450_dt_ids); +MODULE_DEVICE_TABLE(of, mma8450_dt_ids); static struct i2c_driver mma8450_driver = { .driver = { -- cgit v1.1 From db0b34b07438d92c4c190998c42a502fbf90064e Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Mon, 8 Aug 2011 23:45:14 -0700 Subject: Input: bcm5974 - add support for touchpads found in MacBookAir4,2 Added USB device IDs for MacBookAir4,2 trackpad. Device constants were copied from the MacBookAir3,2 constants. The 4,2 device specification is reportedly unchanged from the 3,2 predecessor and seems to work well. Signed-off-by: Joshua V Dillon Signed-off-by: Chase Douglas Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/bcm5974.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index 3126983..48d9ec1 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -67,6 +67,10 @@ #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI 0x0245 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO 0x0246 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS 0x0247 +/* MacbookAir4,2 (unibody, July 2011) */ +#define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI 0x024c +#define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO 0x024d +#define USB_DEVICE_ID_APPLE_WELLSPRING6_JIS 0x024e #define BCM5974_DEVICE(prod) { \ .match_flags = (USB_DEVICE_ID_MATCH_DEVICE | \ @@ -104,6 +108,10 @@ static const struct usb_device_id bcm5974_table[] = { BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_JIS), + /* MacbookAir4,2 */ + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_JIS), /* Terminating entry */ {} }; @@ -294,6 +302,18 @@ static const struct bcm5974_config bcm5974_config_table[] = { { DIM_X, DIM_X / SN_COORD, -4415, 5050 }, { DIM_Y, DIM_Y / SN_COORD, -55, 6680 } }, + { + USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI, + USB_DEVICE_ID_APPLE_WELLSPRING6_ISO, + USB_DEVICE_ID_APPLE_WELLSPRING6_JIS, + HAS_INTEGRATED_BUTTON, + 0x84, sizeof(struct bt_data), + 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, + { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, + { DIM_X, DIM_X / SN_COORD, -4620, 5140 }, + { DIM_Y, DIM_Y / SN_COORD, -150, 6600 } + }, {} }; -- cgit v1.1 From ea5e116162b7e0cf83a2b8a273440514404604de Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 3 Aug 2011 11:12:17 -0400 Subject: xen/blkback: Make description more obvious. With the frontend having Xen but the backend not, it just looks odd: <*> Xen virtual block device support <*> Block-device backend driver Fix it to have the 'Xen' in front of it. Reported-by: Sander Eikelenboom Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 717d6e4..a89ebf1 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -471,7 +471,7 @@ config XEN_BLKDEV_FRONTEND in another domain which drives the actual block device. config XEN_BLKDEV_BACKEND - tristate "Block-device backend driver" + tristate "Xen block-device backend driver" depends on XEN_BACKEND help The block-device backend driver allows the kernel to export its -- cgit v1.1 From fa1bf42ff9296ac4cf211b0a1b450a6071d26a95 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Tue, 9 Aug 2011 20:32:09 +0200 Subject: allow blk_flush_policy to return REQ_FSEQ_DATA independent of *FLUSH blk_insert_flush has the following check: /* * If there's data but flush is not necessary, the request can be * processed directly without going through flush machinery. Queue * for normal execution. */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { list_add_tail(&rq->queuelist, &q->queue_head); return; } However, blk_flush_policy will not return with policy set to only REQ_FSEQ_DATA: static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) { unsigned int policy = 0; if (fflags & REQ_FLUSH) { if (rq->cmd_flags & REQ_FLUSH) policy |= REQ_FSEQ_PREFLUSH; if (blk_rq_sectors(rq)) policy |= REQ_FSEQ_DATA; if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) policy |= REQ_FSEQ_POSTFLUSH; } return policy; } Notice that REQ_FSEQ_DATA is only set if REQ_FLUSH is set. Fix this mismatch by moving the setting of REQ_FSEQ_DATA outside of the REQ_FLUSH check. Tejun notes: Hmmm... yes, this can become a correctness issue if (and only if) blk_queue_flush() is called to change q->flush_flags while requests are in-flight; otherwise, requests wouldn't reach the function at all. Also, I think it would be a generally good idea to always set FSEQ_DATA if the request has data. Cheers, Jeff Signed-off-by: Jeff Moyer Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-flush.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index bb21e4c..2d162bd 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) { unsigned int policy = 0; + if (blk_rq_sectors(rq)) + policy |= REQ_FSEQ_DATA; + if (fflags & REQ_FLUSH) { if (rq->cmd_flags & REQ_FLUSH) policy |= REQ_FSEQ_PREFLUSH; - if (blk_rq_sectors(rq)) - policy |= REQ_FSEQ_DATA; if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) policy |= REQ_FSEQ_POSTFLUSH; } -- cgit v1.1 From 5ac04bf190e6f8b17238aef179ebd7f2bdfec919 Mon Sep 17 00:00:00 2001 From: Andiry Xu Date: Wed, 3 Aug 2011 16:46:48 +0800 Subject: xHCI: fix port U3 status check condition Fix the port U3 status check when Clear PORT_SUSPEND Feature. The port status should be masked with PORT_PLS_MASK to check if it's in U3 state. This should be backported to kernels as old as 2.6.37. Signed-off-by: Andiry Xu Signed-off-by: Sarah Sharp Cc: stable@kernel.org --- drivers/usb/host/xhci-hub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 0be788c..cddcdcca 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -664,7 +664,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, xhci_dbg(xhci, "PORTSC %04x\n", temp); if (temp & PORT_RESET) goto error; - if (temp & XDEV_U3) { + if ((temp & PORT_PLS_MASK) == XDEV_U3) { if ((temp & PORT_PE) == 0) goto error; -- cgit v1.1 From 8a8ff2f9399b23b968901f585ccb5a70a537c5ae Mon Sep 17 00:00:00 2001 From: Andiry Xu Date: Wed, 3 Aug 2011 16:46:49 +0800 Subject: xHCI: report USB2 port in resuming as suspend When a USB2 port initiate a remote wakeup, software shall ensure that resume is signaled for at least 20ms, and then write '0' to the PLS field. According to this, xhci driver do the following things: 1. When receive a remote wakeup event in irq_handler, set the resume_done value as jiffies + 20ms, and modify rh_timer to poll root hub status at that time; 2. When receive a GetPortStatus request, if the jiffies is after the resume_done value, clear the resume signal and resume_done. However, if usb_port_resume() is called before the rh_timer triggered, it will indicate the port as Suspend Cleared and skip the clear resume signal part. The device will fail the usb_get_status request in finish_port_resume(), and usbcore will try a reset-resume instead. Device will work OK after reset-resume, but resume_done value is not cleared in this case, and xhci_bus_suspend() will fail because when it finds a non-zero resume_done value, it will regard the port as resuming and return -EBUSY. This causes issue on some platforms that the system fail to suspend after remote wakeup from suspend by USB2 devices connected to xHCI port. To fix this issue, report the port status as suspend if the resume is signaling less that 20ms, and usb_port_resume() will wait 25ms and check port status again, so xHCI driver can clear the resume signaling and resume_done value. This should be backported to kernels as old as 2.6.37. Signed-off-by: Andiry Xu Signed-off-by: Sarah Sharp Cc: stable@kernel.org --- drivers/usb/host/xhci-hub.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index cddcdcca..1e96d1f 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -463,11 +463,12 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, && (temp & PORT_POWER)) status |= USB_PORT_STAT_SUSPEND; } - if ((temp & PORT_PLS_MASK) == XDEV_RESUME) { + if ((temp & PORT_PLS_MASK) == XDEV_RESUME && + !DEV_SUPERSPEED(temp)) { if ((temp & PORT_RESET) || !(temp & PORT_PE)) goto error; - if (!DEV_SUPERSPEED(temp) && time_after_eq(jiffies, - bus_state->resume_done[wIndex])) { + if (time_after_eq(jiffies, + bus_state->resume_done[wIndex])) { xhci_dbg(xhci, "Resume USB2 port %d\n", wIndex + 1); bus_state->resume_done[wIndex] = 0; @@ -487,6 +488,14 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, xhci_ring_device(xhci, slot_id); bus_state->port_c_suspend |= 1 << wIndex; bus_state->suspended_ports &= ~(1 << wIndex); + } else { + /* + * The resume has been signaling for less than + * 20ms. Report the port status as SUSPEND, + * let the usbcore check port status again + * and clear resume signaling later. + */ + status |= USB_PORT_STAT_SUSPEND; } } if ((temp & PORT_PLS_MASK) == XDEV_U0 -- cgit v1.1 From d13565c12828ce0cd2a3862bf6260164a0653352 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Fri, 22 Jul 2011 14:34:34 -0700 Subject: xhci: Fix memory leak during failed enqueue. When the isochronous transfer support was introduced, and the xHCI driver switched to using urb->hcpriv to store an "urb_priv" pointer, a couple of memory leaks were introduced into the URB enqueue function in its error handling paths. xhci_urb_enqueue allocates urb_priv, but it doesn't free it if changing the control endpoint's max packet size fails or the bulk endpoint is in the middle of allocating or deallocating streams. xhci_urb_enqueue also doesn't free urb_priv if any of the four endpoint types' enqueue functions fail. Instead, it expects those functions to free urb_priv if an error occurs. However, the bulk, control, and interrupt enqueue functions do not free urb_priv if the endpoint ring is NULL. It will, however, get freed if prepare_transfer() fails in those enqueue functions. Several of the error paths in the isochronous endpoint enqueue function also fail to free it. xhci_queue_isoc_tx_prepare() doesn't free urb_priv if prepare_ring() indicates there is not enough room for all the isochronous TDs in this URB. If individual isochronous TDs fail to be queued (perhaps due to an endpoint state change), urb_priv is also leaked. This argues that the freeing of urb_priv should be done in the function that allocated it, xhci_urb_enqueue. This patch looks rather ugly, but refactoring the code will have to wait because this patch needs to be backported to stable kernels. This patch should be backported to kernels as old as 2.6.36. Signed-off-by: Sarah Sharp Cc: Andiry Xu Cc: stable@kernel.org --- drivers/usb/host/xhci-ring.c | 5 +---- drivers/usb/host/xhci.c | 21 +++++++++++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 7113d16..9d3f9dd 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2500,11 +2500,8 @@ static int prepare_transfer(struct xhci_hcd *xhci, if (td_index == 0) { ret = usb_hcd_link_urb_to_ep(bus_to_hcd(urb->dev->bus), urb); - if (unlikely(ret)) { - xhci_urb_free_priv(xhci, urb_priv); - urb->hcpriv = NULL; + if (unlikely(ret)) return ret; - } } td->urb = urb; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1c4432d..8e84acf 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1085,8 +1085,11 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) if (urb->dev->speed == USB_SPEED_FULL) { ret = xhci_check_maxpacket(xhci, slot_id, ep_index, urb); - if (ret < 0) + if (ret < 0) { + xhci_urb_free_priv(xhci, urb_priv); + urb->hcpriv = NULL; return ret; + } } /* We have a spinlock and interrupts disabled, so we must pass @@ -1097,6 +1100,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) goto dying; ret = xhci_queue_ctrl_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index); + if (ret) + goto free_priv; spin_unlock_irqrestore(&xhci->lock, flags); } else if (usb_endpoint_xfer_bulk(&urb->ep->desc)) { spin_lock_irqsave(&xhci->lock, flags); @@ -1117,6 +1122,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) ret = xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index); } + if (ret) + goto free_priv; spin_unlock_irqrestore(&xhci->lock, flags); } else if (usb_endpoint_xfer_int(&urb->ep->desc)) { spin_lock_irqsave(&xhci->lock, flags); @@ -1124,6 +1131,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) goto dying; ret = xhci_queue_intr_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index); + if (ret) + goto free_priv; spin_unlock_irqrestore(&xhci->lock, flags); } else { spin_lock_irqsave(&xhci->lock, flags); @@ -1131,18 +1140,22 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) goto dying; ret = xhci_queue_isoc_tx_prepare(xhci, GFP_ATOMIC, urb, slot_id, ep_index); + if (ret) + goto free_priv; spin_unlock_irqrestore(&xhci->lock, flags); } exit: return ret; dying: - xhci_urb_free_priv(xhci, urb_priv); - urb->hcpriv = NULL; xhci_dbg(xhci, "Ep 0x%x: URB %p submitted for " "non-responsive xHCI host.\n", urb->ep->desc.bEndpointAddress, urb); + ret = -ESHUTDOWN; +free_priv: + xhci_urb_free_priv(xhci, urb_priv); + urb->hcpriv = NULL; spin_unlock_irqrestore(&xhci->lock, flags); - return -ESHUTDOWN; + return ret; } /* Get the right ring for the given URB. -- cgit v1.1 From 522989a27c7badb608155b1f1dea3487ed431f74 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Fri, 29 Jul 2011 12:44:32 -0700 Subject: xhci: Fix failed enqueue in the middle of isoch TD. When an isochronous transfer is enqueued, xhci_queue_isoc_tx_prepare() will ensure that there is enough room on the transfer rings for all of the isochronous TDs for that URB. However, when xhci_queue_isoc_tx() is enqueueing individual isoc TDs, the prepare_transfer() function can fail if the endpoint state has changed to disabled, error, or some other unknown state. With the current code, if Nth TD (not the first TD) fails, the ring is left in a sorry state. The partially enqueued TDs are left on the ring, and the first TRB of the TD is not given back to the hardware. The enqueue pointer is left on the TRB after the last successfully enqueued TD. This means the ring is basically useless. Any new transfers will be enqueued after the failed TDs, which the hardware will never read because the cycle bit indicates it does not own them. The ring will fill up with untransferred TDs, and the endpoint will be basically unusable. The untransferred TDs will also remain on the TD list. Since the td_list is a FIFO, this basically means the ring handler will be waiting on TDs that will never be completed (or worse, dereference memory that doesn't exist any more). Change the code to clean up the isochronous ring after a failed transfer. If the first TD failed, simply return and allow the xhci_urb_enqueue function to free the urb_priv. If the Nth TD failed, first remove the TDs from the td_list. Then convert the TRBs that were enqueued into No-op TRBs. Make sure to flip the cycle bit on all enqueued TRBs (including any link TRBs in the middle or between TDs), but leave the cycle bit of the first TRB (which will show software-owned) intact. Then move the ring enqueue pointer back to the first TRB and make sure to change the xhci_ring's cycle state to what is appropriate for that ring segment. This ensures that the No-op TRBs will be overwritten by subsequent TDs, and the hardware will not start executing random TRBs because the cycle bit was left as hardware-owned. This bug is unlikely to be hit, but it was something I noticed while tracking down the watchdog timer issue. I verified that the fix works by injecting some errors on the 250th isochronous URB queued, although I could not verify that the ring is in the correct state because uvcvideo refused to talk to the device after the first usb_submit_urb() failed. Ring debugging shows that the ring looks correct, however. This patch should be backported to kernels as old as 2.6.36. Signed-off-by: Sarah Sharp Cc: Andiry Xu Cc: stable@kernel.org --- drivers/usb/host/xhci-ring.c | 50 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 9d3f9dd..f72149b 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -514,8 +514,12 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci, (unsigned long long) addr); } +/* flip_cycle means flip the cycle bit of all but the first and last TRB. + * (The last TRB actually points to the ring enqueue pointer, which is not part + * of this TD.) This is used to remove partially enqueued isoc TDs from a ring. + */ static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, - struct xhci_td *cur_td) + struct xhci_td *cur_td, bool flip_cycle) { struct xhci_segment *cur_seg; union xhci_trb *cur_trb; @@ -528,6 +532,12 @@ static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, * leave the pointers intact. */ cur_trb->generic.field[3] &= cpu_to_le32(~TRB_CHAIN); + /* Flip the cycle bit (link TRBs can't be the first + * or last TRB). + */ + if (flip_cycle) + cur_trb->generic.field[3] ^= + cpu_to_le32(TRB_CYCLE); xhci_dbg(xhci, "Cancel (unchain) link TRB\n"); xhci_dbg(xhci, "Address = %p (0x%llx dma); " "in seg %p (0x%llx dma)\n", @@ -541,6 +551,11 @@ static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, cur_trb->generic.field[2] = 0; /* Preserve only the cycle bit of this TRB */ cur_trb->generic.field[3] &= cpu_to_le32(TRB_CYCLE); + /* Flip the cycle bit except on the first or last TRB */ + if (flip_cycle && cur_trb != cur_td->first_trb && + cur_trb != cur_td->last_trb) + cur_trb->generic.field[3] ^= + cpu_to_le32(TRB_CYCLE); cur_trb->generic.field[3] |= cpu_to_le32( TRB_TYPE(TRB_TR_NOOP)); xhci_dbg(xhci, "Cancel TRB %p (0x%llx dma) " @@ -719,7 +734,7 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci, cur_td->urb->stream_id, cur_td, &deq_state); else - td_to_noop(xhci, ep_ring, cur_td); + td_to_noop(xhci, ep_ring, cur_td, false); remove_finished_td: /* * The event handler won't see a completion for this TD anymore, @@ -3223,6 +3238,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, start_trb = &ep_ring->enqueue->generic; start_cycle = ep_ring->cycle_state; + urb_priv = urb->hcpriv; /* Queue the first TRB, even if it's zero-length */ for (i = 0; i < num_tds; i++) { unsigned int total_packet_count; @@ -3246,12 +3262,13 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index, urb->stream_id, trbs_per_td, urb, i, mem_flags); - if (ret < 0) - return ret; + if (ret < 0) { + if (i == 0) + return ret; + goto cleanup; + } - urb_priv = urb->hcpriv; td = urb_priv->td[i]; - for (j = 0; j < trbs_per_td; j++) { u32 remainder = 0; field = TRB_TBC(burst_count) | TRB_TLBPC(residue); @@ -3341,6 +3358,27 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id, start_cycle, start_trb); return 0; +cleanup: + /* Clean up a partially enqueued isoc transfer. */ + + for (i--; i >= 0; i--) + list_del(&urb_priv->td[i]->td_list); + + /* Use the first TD as a temporary variable to turn the TDs we've queued + * into No-ops with a software-owned cycle bit. That way the hardware + * won't accidentally start executing bogus TDs when we partially + * overwrite them. td->first_trb and td->start_seg are already set. + */ + urb_priv->td[0]->last_trb = ep_ring->enqueue; + /* Every TRB except the first & last will have its cycle bit flipped. */ + td_to_noop(xhci, ep_ring, urb_priv->td[0], true); + + /* Reset the ring enqueue back to the first TRB and its cycle bit. */ + ep_ring->enqueue = urb_priv->td[0]->first_trb; + ep_ring->enq_seg = urb_priv->td[0]->start_seg; + ep_ring->cycle_state = start_cycle; + usb_hcd_unlink_urb_from_ep(bus_to_hcd(urb->dev->bus), urb); + return ret; } /* -- cgit v1.1 From 585df1d90cb07a02ca6c7a7d339e56e46d50dafb Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Tue, 2 Aug 2011 15:43:40 -0700 Subject: xhci: Remove TDs from TD lists when URBs are canceled. When a driver tries to cancel an URB, and the host controller is dying, xhci_urb_dequeue will giveback the URB without removing the xhci_tds that comprise that URB from the td_list or the cancelled_td_list. This can cause a race condition between the driver calling URB dequeue and the stop endpoint command watchdog timer. If the timer fires on a dying host, and a driver attempts to resubmit while the watchdog timer has dropped the xhci->lock to giveback a cancelled URB, URBs may be given back by the xhci_urb_dequeue() function. At that point, the URB's priv pointer will be freed and set to NULL, but the TDs will remain on the td_list. This will cause an oops in xhci_giveback_urb_in_irq() when the watchdog timer attempts to loop through the endpoints' td_lists, giving back killed URBs. Make sure that xhci_urb_dequeue() removes TDs from the TD lists and canceled TD lists before it gives back the URB. This patch should be backported to kernels as old as 2.6.36. Signed-off-by: Sarah Sharp Cc: Andiry Xu Cc: stable@kernel.org --- drivers/usb/host/xhci-ring.c | 16 ++++++++-------- drivers/usb/host/xhci.c | 7 +++++++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index f72149b..b2d654b 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -741,7 +741,7 @@ remove_finished_td: * so remove it from the endpoint ring's TD list. Keep it in * the cancelled TD list for URB completion later. */ - list_del(&cur_td->td_list); + list_del_init(&cur_td->td_list); } last_unlinked_td = cur_td; xhci_stop_watchdog_timer_in_irq(xhci, ep); @@ -769,7 +769,7 @@ remove_finished_td: do { cur_td = list_entry(ep->cancelled_td_list.next, struct xhci_td, cancelled_td_list); - list_del(&cur_td->cancelled_td_list); + list_del_init(&cur_td->cancelled_td_list); /* Clean up the cancelled URB */ /* Doesn't matter what we pass for status, since the core will @@ -877,9 +877,9 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg) cur_td = list_first_entry(&ring->td_list, struct xhci_td, td_list); - list_del(&cur_td->td_list); + list_del_init(&cur_td->td_list); if (!list_empty(&cur_td->cancelled_td_list)) - list_del(&cur_td->cancelled_td_list); + list_del_init(&cur_td->cancelled_td_list); xhci_giveback_urb_in_irq(xhci, cur_td, -ESHUTDOWN, "killed"); } @@ -888,7 +888,7 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg) &temp_ep->cancelled_td_list, struct xhci_td, cancelled_td_list); - list_del(&cur_td->cancelled_td_list); + list_del_init(&cur_td->cancelled_td_list); xhci_giveback_urb_in_irq(xhci, cur_td, -ESHUTDOWN, "killed"); } @@ -1580,10 +1580,10 @@ td_cleanup: else *status = 0; } - list_del(&td->td_list); + list_del_init(&td->td_list); /* Was this TD slated to be cancelled but completed anyway? */ if (!list_empty(&td->cancelled_td_list)) - list_del(&td->cancelled_td_list); + list_del_init(&td->cancelled_td_list); urb_priv->td_cnt++; /* Giveback the urb when all the tds are completed */ @@ -3362,7 +3362,7 @@ cleanup: /* Clean up a partially enqueued isoc transfer. */ for (i--; i >= 0; i--) - list_del(&urb_priv->td[i]->td_list); + list_del_init(&urb_priv->td[i]->td_list); /* Use the first TD as a temporary variable to turn the TDs we've queued * into No-ops with a software-owned cycle bit. That way the hardware diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 8e84acf..3a0f695 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1252,6 +1252,13 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_HALTED)) { xhci_dbg(xhci, "HW died, freeing TD.\n"); urb_priv = urb->hcpriv; + for (i = urb_priv->td_cnt; i < urb_priv->length; i++) { + td = urb_priv->td[i]; + if (!list_empty(&td->td_list)) + list_del_init(&td->td_list); + if (!list_empty(&td->cancelled_td_list)) + list_del_init(&td->cancelled_td_list); + } usb_hcd_unlink_urb_from_ep(hcd, urb); spin_unlock_irqrestore(&xhci->lock, flags); -- cgit v1.1 From 5185352c163a72cf969b2fbbfb89801b398896fd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Aug 2011 14:48:11 -0700 Subject: libceph: fix msgpool There were several problems here: 1- we weren't tagging allocations with the pool, so they were never returned to the pool. 2- msgpool_put didn't add back to the mempool, even it were called. 3- msgpool_release didn't clear the pool pointer, so it would have looped had #1 not been broken. These may or may not have been responsible for #1136 or #1381 (BUG due to non-empty mempool on umount). I can't seem to trigger the crash now using the method I was using before. Signed-off-by: Sage Weil --- net/ceph/msgpool.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index d5f2d97..1f4cb30 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -7,27 +7,37 @@ #include -static void *alloc_fn(gfp_t gfp_mask, void *arg) +static void *msgpool_alloc(gfp_t gfp_mask, void *arg) { struct ceph_msgpool *pool = arg; - void *p; + struct ceph_msg *msg; - p = ceph_msg_new(0, pool->front_len, gfp_mask); - if (!p) - pr_err("msgpool %s alloc failed\n", pool->name); - return p; + msg = ceph_msg_new(0, pool->front_len, gfp_mask); + if (!msg) { + dout("msgpool_alloc %s failed\n", pool->name); + } else { + dout("msgpool_alloc %s %p\n", pool->name, msg); + msg->pool = pool; + } + return msg; } -static void free_fn(void *element, void *arg) +static void msgpool_free(void *element, void *arg) { - ceph_msg_put(element); + struct ceph_msgpool *pool = arg; + struct ceph_msg *msg = element; + + dout("msgpool_release %s %p\n", pool->name, msg); + msg->pool = NULL; + ceph_msg_put(msg); } int ceph_msgpool_init(struct ceph_msgpool *pool, int front_len, int size, bool blocking, const char *name) { + dout("msgpool %s init\n", name); pool->front_len = front_len; - pool->pool = mempool_create(size, alloc_fn, free_fn, pool); + pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool); if (!pool->pool) return -ENOMEM; pool->name = name; @@ -36,14 +46,17 @@ int ceph_msgpool_init(struct ceph_msgpool *pool, void ceph_msgpool_destroy(struct ceph_msgpool *pool) { + dout("msgpool %s destroy\n", pool->name); mempool_destroy(pool->pool); } struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) { + struct ceph_msg *msg; + if (front_len > pool->front_len) { - pr_err("msgpool_get pool %s need front %d, pool size is %d\n", + dout("msgpool_get %s need front %d, pool size is %d\n", pool->name, front_len, pool->front_len); WARN_ON(1); @@ -51,14 +64,19 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, return ceph_msg_new(0, front_len, GFP_NOFS); } - return mempool_alloc(pool->pool, GFP_NOFS); + msg = mempool_alloc(pool->pool, GFP_NOFS); + dout("msgpool_get %s %p\n", pool->name, msg); + return msg; } void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) { + dout("msgpool_put %s %p\n", pool->name, msg); + /* reset msg front_len; user may have changed it */ msg->front.iov_len = pool->front_len; msg->hdr.front_len = cpu_to_le32(pool->front_len); kref_init(&msg->kref); /* retake single ref */ + mempool_free(msg, pool->pool); } -- cgit v1.1 From 4f6fdf08681cecd9f38499de7a02eb4f05f399a7 Mon Sep 17 00:00:00 2001 From: Chase Douglas Date: Fri, 5 Aug 2011 09:16:57 -0700 Subject: HID: magicmouse: Set resolution of touch surfaces Add touch surface resolution information. The size of the touch surfaces has been determined to the hundredth of a mm. Cc: Jiri Kosina Cc: Michael Poole Cc: linux-input@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Chase Douglas [jkosina@suse.cz: update comments and commit message] Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 56 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 0ec91c1..b5bdab3 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -81,6 +81,28 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie #define NO_TOUCHES -1 #define SINGLE_TOUCH_UP -2 +/* Touch surface information. Dimension is in hundredths of a mm, min and max + * are in units. */ +#define MOUSE_DIMENSION_X (float)9056 +#define MOUSE_MIN_X -1100 +#define MOUSE_MAX_X 1258 +#define MOUSE_RES_X ((MOUSE_MAX_X - MOUSE_MIN_X) / (MOUSE_DIMENSION_X / 100)) +#define MOUSE_DIMENSION_Y (float)5152 +#define MOUSE_MIN_Y -1589 +#define MOUSE_MAX_Y 2047 +#define MOUSE_RES_Y ((MOUSE_MAX_Y - MOUSE_MIN_Y) / (MOUSE_DIMENSION_Y / 100)) + +#define TRACKPAD_DIMENSION_X (float)13000 +#define TRACKPAD_MIN_X -2909 +#define TRACKPAD_MAX_X 3167 +#define TRACKPAD_RES_X \ + ((TRACKPAD_MAX_X - TRACKPAD_MIN_X) / (TRACKPAD_DIMENSION_X / 100)) +#define TRACKPAD_DIMENSION_Y (float)11000 +#define TRACKPAD_MIN_Y -2456 +#define TRACKPAD_MAX_Y 2565 +#define TRACKPAD_RES_Y \ + ((TRACKPAD_MAX_Y - TRACKPAD_MIN_Y) / (TRACKPAD_DIMENSION_Y / 100)) + /** * struct magicmouse_sc - Tracks Magic Mouse-specific data. * @input: Input device through which we report events. @@ -406,17 +428,31 @@ static void magicmouse_setup_input(struct input_dev *input, struct hid_device *h * inverse of the reported Y. */ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) { - input_set_abs_params(input, ABS_MT_POSITION_X, -1100, - 1358, 4, 0); - input_set_abs_params(input, ABS_MT_POSITION_Y, -1589, - 2047, 4, 0); + input_set_abs_params(input, ABS_MT_POSITION_X, + MOUSE_MIN_X, MOUSE_MAX_X, 4, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, + MOUSE_MIN_Y, MOUSE_MAX_Y, 4, 0); + + input_abs_set_res(input, ABS_MT_POSITION_X, + MOUSE_RES_X); + input_abs_set_res(input, ABS_MT_POSITION_Y, + MOUSE_RES_Y); } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ - input_set_abs_params(input, ABS_X, -2909, 3167, 4, 0); - input_set_abs_params(input, ABS_Y, -2456, 2565, 4, 0); - input_set_abs_params(input, ABS_MT_POSITION_X, -2909, - 3167, 4, 0); - input_set_abs_params(input, ABS_MT_POSITION_Y, -2456, - 2565, 4, 0); + input_set_abs_params(input, ABS_X, TRACKPAD_MIN_X, + TRACKPAD_MAX_X, 4, 0); + input_set_abs_params(input, ABS_Y, TRACKPAD_MIN_Y, + TRACKPAD_MAX_Y, 4, 0); + input_set_abs_params(input, ABS_MT_POSITION_X, + TRACKPAD_MIN_X, TRACKPAD_MAX_X, 4, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, + TRACKPAD_MIN_Y, TRACKPAD_MAX_Y, 4, 0); + + input_abs_set_res(input, ABS_X, TRACKPAD_RES_X); + input_abs_set_res(input, ABS_Y, TRACKPAD_RES_Y); + input_abs_set_res(input, ABS_MT_POSITION_X, + TRACKPAD_RES_X); + input_abs_set_res(input, ABS_MT_POSITION_Y, + TRACKPAD_RES_Y); } input_set_events_per_packet(input, 60); -- cgit v1.1 From 971c90bfa2f0b4fe52d6d9002178d547706f1343 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 4 Aug 2011 07:25:35 -0700 Subject: alarmtimers: Avoid possible null pointer traversal We don't check if old_setting is non null before assigning it, so correct this. CC: Thomas Gleixner CC: stable@kernel.org Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 59f369f..1dee3f6 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -479,11 +479,8 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, if (!rtcdev) return -ENOTSUPP; - /* Save old values */ - old_setting->it_interval = - ktime_to_timespec(timr->it.alarmtimer.period); - old_setting->it_value = - ktime_to_timespec(timr->it.alarmtimer.node.expires); + if (old_setting) + alarm_timer_get(timr, old_setting); /* If the timer was already set, cancel it */ alarm_cancel(&timr->it.alarmtimer); -- cgit v1.1 From ea7802f630d356acaf66b3c0b28c00a945fc35dc Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 4 Aug 2011 07:51:56 -0700 Subject: alarmtimers: Memset itimerspec passed into alarm_timer_get Following common_timer_get, zero out the itimerspec passed in. CC: Thomas Gleixner CC: stable@kernel.org Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 1dee3f6..0e9263f 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_itimer *new_timer) static void alarm_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) { + memset(cur_setting, 0, sizeof(struct itimerspec)); + cur_setting->it_interval = ktime_to_timespec(timr->it.alarmtimer.period); cur_setting->it_value = -- cgit v1.1 From 6af7e471e5a7746b8024d70b4363d3dfe41d36b8 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 10 Aug 2011 10:26:09 -0700 Subject: alarmtimers: Avoid possible denial of service with high freq periodic timers Its possible to jam up the alarm timers by setting very small interval timers, which will cause the alarmtimer subsystem to spend all of its time firing and restarting timers. This can effectivly lock up a box. A deeper fix is needed, closely mimicking the hrtimer code, but for now just cap the interval to 100us to avoid userland hanging the system. CC: Thomas Gleixner CC: stable@kernel.org Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 0e9263f..ea5e1a9 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -481,6 +481,15 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, if (!rtcdev) return -ENOTSUPP; + /* + * XXX HACK! Currently we can DOS a system if the interval + * period on alarmtimers is too small. Cap the interval here + * to 100us and solve this properly in a future patch! -jstultz + */ + if ((new_setting->it_interval.tv_sec == 0) && + (new_setting->it_interval.tv_nsec < 100000)) + new_setting->it_interval.tv_nsec = 100000; + if (old_setting) alarm_timer_get(timr, old_setting); -- cgit v1.1 From 30eefc95841ce51c3281876f0b954dd1d3c0bd5f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 10 Aug 2011 11:22:42 -0700 Subject: xen: xen-selfballoon.c needs more header files Fix build errors (found when CONFIG_SYSFS is not enabled): drivers/xen/xen-selfballoon.c:446: warning: data definition has no type or storage class drivers/xen/xen-selfballoon.c:446: warning: type defaults to 'int' in declaration of 'EXPORT_SYMBOL' drivers/xen/xen-selfballoon.c:446: warning: parameter names (without types) in function declaration drivers/xen/xen-selfballoon.c:485: error: expected declaration specifiers or '...' before string constant drivers/xen/xen-selfballoon.c:485: warning: data definition has no type or storage class drivers/xen/xen-selfballoon.c:485: warning: type defaults to 'int' in declaration of 'MODULE_LICENSE' drivers/xen/xen-selfballoon.c:485: warning: function declaration isn't a prototype Signed-off-by: Randy Dunlap Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-selfballoon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 1b4afd8..6ea852e 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include -- cgit v1.1 From bf6ed027bcc93f8d54d321fe87f0434b25699eb1 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 10 Aug 2011 21:11:26 +0800 Subject: rtc: ep93xx: Fix 'rtc' may be used uninitialized warning commit 92d921c5d "rtc: ep93xx: Initialize drvdata before registering device" ensures the drvdata is initialized prior to registering the rtc device. But it set the drvdata to an uninitialized pointer. Thus calling platform_get_drvdata in ep93xx_rtc_remove does not get correct address. This patch fixes below warning by adding struct rtc_device *rtc to struct ep93xx_rtc. Then set platform drvdata to ep93xx_rtc instead of rtc. CC drivers/rtc/rtc-ep93xx.o drivers/rtc/rtc-ep93xx.c: In function 'ep93xx_rtc_probe': drivers/rtc/rtc-ep93xx.c:154: warning: 'rtc' may be used uninitialized in this function Signed-off-by: Axel Lin Signed-off-by: John Stultz --- drivers/rtc/rtc-ep93xx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 335551d..14a42a1 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -36,6 +36,7 @@ */ struct ep93xx_rtc { void __iomem *mmio_base; + struct rtc_device *rtc; }; static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload, @@ -130,7 +131,6 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev) { struct ep93xx_rtc *ep93xx_rtc; struct resource *res; - struct rtc_device *rtc; int err; ep93xx_rtc = devm_kzalloc(&pdev->dev, sizeof(*ep93xx_rtc), GFP_KERNEL); @@ -151,12 +151,12 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev) return -ENXIO; pdev->dev.platform_data = ep93xx_rtc; - platform_set_drvdata(pdev, rtc); + platform_set_drvdata(pdev, ep93xx_rtc); - rtc = rtc_device_register(pdev->name, + ep93xx_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, &ep93xx_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc)) { - err = PTR_ERR(rtc); + if (IS_ERR(ep93xx_rtc->rtc)) { + err = PTR_ERR(ep93xx_rtc->rtc); goto exit; } @@ -167,7 +167,7 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev) return 0; fail: - rtc_device_unregister(rtc); + rtc_device_unregister(ep93xx_rtc->rtc); exit: platform_set_drvdata(pdev, NULL); pdev->dev.platform_data = NULL; @@ -176,11 +176,11 @@ exit: static int __exit ep93xx_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtc = platform_get_drvdata(pdev); + struct ep93xx_rtc *ep93xx_rtc = platform_get_drvdata(pdev); sysfs_remove_group(&pdev->dev.kobj, &ep93xx_rtc_sysfs_files); platform_set_drvdata(pdev, NULL); - rtc_device_unregister(rtc); + rtc_device_unregister(ep93xx_rtc->rtc); pdev->dev.platform_data = NULL; return 0; -- cgit v1.1 From dec35d19c4ec65b94df3b27b6e373f0d48c9cd32 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 16 Mar 2011 06:07:14 +0000 Subject: rtc: rtc-twl: Switch to using threaded irq The driver is accessing to i2c bus in interrupt handler. Therefore, it should use threaded irq. Signed-off-by: Ilkka Koskinen Acked-by: Balaji T K Signed-off-by: John Stultz --- drivers/rtc/rtc-twl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 9a81f77..1963cdd 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -462,7 +462,7 @@ static int __devinit twl_rtc_probe(struct platform_device *pdev) if (ret < 0) goto out1; - ret = request_irq(irq, twl_rtc_interrupt, + ret = request_threaded_irq(irq, NULL, twl_rtc_interrupt, IRQF_TRIGGER_RISING, dev_name(&rtc->dev), rtc); if (ret < 0) { -- cgit v1.1 From 34d623d11316cb69f9e8cc5eb50d3792b5c302b6 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 31 May 2011 08:51:39 +0000 Subject: rtc: rtc-twl: Remove lockdep related local_irq_enable() Now that the irq is properly threaded (due to it needing i2c access) we should also remove the local_irq_enable() call in twl_rtc_interrupt. Testing this with Pandaboard, the RTC is still working. [Reworked commit message -jstultz] Signed-off-by: John Stultz --- drivers/rtc/rtc-twl.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 1963cdd..9677bbc 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -362,14 +362,6 @@ static irqreturn_t twl_rtc_interrupt(int irq, void *rtc) int res; u8 rd_reg; -#ifdef CONFIG_LOCKDEP - /* WORKAROUND for lockdep forcing IRQF_DISABLED on us, which - * we don't want and can't tolerate. Although it might be - * friendlier not to borrow this thread context... - */ - local_irq_enable(); -#endif - res = twl_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG); if (res) goto out; -- cgit v1.1 From 938f97bcf1bdd1b681d5d14d1d7117a2e22d4434 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 22 Jul 2011 09:12:51 +0000 Subject: rtc: Fix RTC PIE frequency limit Thomas earlier submitted a fix to limit the RTC PIE freq, but picked 5000Hz out of the air. Willy noticed that we should instead use the 8192Hz max from the rtc man documentation. Cc: Willy Tarreau Cc: stable@kernel.org Cc: Thomas Gleixner Signed-off-by: John Stultz --- drivers/rtc/interface.c | 2 +- include/linux/rtc.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 3195dbd..eb4c883 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -708,7 +708,7 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq) int err = 0; unsigned long flags; - if (freq <= 0 || freq > 5000) + if (freq <= 0 || freq > RTC_MAX_FREQ) return -EINVAL; retry: spin_lock_irqsave(&rtc->irq_task_lock, flags); diff --git a/include/linux/rtc.h b/include/linux/rtc.h index b27ebea..93f4d03 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -97,6 +97,9 @@ struct rtc_pll_info { #define RTC_AF 0x20 /* Alarm interrupt */ #define RTC_UF 0x10 /* Update interrupt for 1Hz RTC */ + +#define RTC_MAX_FREQ 8192 + #ifdef __KERNEL__ #include -- cgit v1.1 From 8e4bf84474960e832b56293c9b0674c88b5b05ce Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 11 Aug 2011 10:36:03 +0200 Subject: Move some REQ flags to the common bio/request area REQ_SECURE, REQ_FLUSH and REQ_FUA may all be set on a bio as well as on a request, so relocate them to the shared part of the enum. Signed-off-by: Matthew Wilcox Signed-off-by: Namhyung Kim Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6395692..32f0076 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -125,7 +125,11 @@ enum rq_flag_bits { __REQ_SYNC, /* request is sync (sync write or read) */ __REQ_META, /* metadata io request */ __REQ_DISCARD, /* request to discard sectors */ + __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ + __REQ_NOIDLE, /* don't anticipate more IO after this one */ + __REQ_FUA, /* forced unit access */ + __REQ_FLUSH, /* request for cache flush */ /* bio only flags */ __REQ_RAHEAD, /* read ahead, can fail anytime */ @@ -135,7 +139,6 @@ enum rq_flag_bits { /* request only flags */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ - __REQ_FUA, /* forced unit access */ __REQ_NOMERGE, /* don't touch this for merging */ __REQ_STARTED, /* drive already may have started this one */ __REQ_DONTPREP, /* don't call prep for this one */ @@ -146,11 +149,9 @@ enum rq_flag_bits { __REQ_PREEMPT, /* set for "ide_preempt" requests */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ - __REQ_FLUSH, /* request for cache flush */ __REQ_FLUSH_SEQ, /* request for flush sequence */ __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ - __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ __REQ_NR_BITS, /* stops here */ }; -- cgit v1.1 From c09c47caedc9854d59378d6e34c989e51cfdd2b4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 11 Aug 2011 10:36:05 +0200 Subject: blktrace: add FLUSH/FUA support Add FLUSH/FUA support to blktrace. As FLUSH precedes WRITE and/or FUA follows WRITE, use the same 'F' flag for both cases and distinguish them by their (relative) position. The end results look like (other flags might be shown also): - WRITE: W - WRITE_FLUSH: FW - WRITE_FUA: WF - WRITE_FLUSH_FUA: FWF Note that we reuse TC_BARRIER due to lack of bit space of act_mask so that the older versions of blktrace tools will report flush requests as barriers from now on. Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Namhyung Kim Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 5 +++-- include/trace/events/block.h | 20 +++++++++++--------- kernel/trace/blktrace.c | 21 ++++++++++++++++----- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 8c7c2de..8e9e4bc 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -14,7 +14,7 @@ enum blktrace_cat { BLK_TC_READ = 1 << 0, /* reads */ BLK_TC_WRITE = 1 << 1, /* writes */ - BLK_TC_BARRIER = 1 << 2, /* barrier */ + BLK_TC_FLUSH = 1 << 2, /* flush */ BLK_TC_SYNC = 1 << 3, /* sync IO */ BLK_TC_SYNCIO = BLK_TC_SYNC, BLK_TC_QUEUE = 1 << 4, /* queueing/merging */ @@ -28,8 +28,9 @@ enum blktrace_cat { BLK_TC_META = 1 << 12, /* metadata */ BLK_TC_DISCARD = 1 << 13, /* discard requests */ BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */ + BLK_TC_FUA = 1 << 15, /* fua requests */ - BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ + BLK_TC_END = 1 << 15, /* we've run out of bits! */ }; #define BLK_TC_SHIFT (16) diff --git a/include/trace/events/block.h b/include/trace/events/block.h index bf36654..05c5e61 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -8,6 +8,8 @@ #include #include +#define RWBS_LEN 8 + DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), @@ -19,7 +21,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error, __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( int, errors ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) ), @@ -104,7 +106,7 @@ DECLARE_EVENT_CLASS(block_rq, __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) ), @@ -183,7 +185,7 @@ TRACE_EVENT(block_bio_bounce, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), @@ -222,7 +224,7 @@ TRACE_EVENT(block_bio_complete, __field( sector_t, sector ) __field( unsigned, nr_sector ) __field( int, error ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( @@ -249,7 +251,7 @@ DECLARE_EVENT_CLASS(block_bio, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), @@ -321,7 +323,7 @@ DECLARE_EVENT_CLASS(block_get_rq, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), @@ -456,7 +458,7 @@ TRACE_EVENT(block_split, __field( dev_t, dev ) __field( sector_t, sector ) __field( sector_t, new_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), @@ -498,7 +500,7 @@ TRACE_EVENT(block_bio_remap, __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( @@ -542,7 +544,7 @@ TRACE_EVENT(block_rq_remap, __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 6957aa2..7c910a5 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= MASK_TC_BIT(rw, RAHEAD); what |= MASK_TC_BIT(rw, META); what |= MASK_TC_BIT(rw, DISCARD); + what |= MASK_TC_BIT(rw, FLUSH); + what |= MASK_TC_BIT(rw, FUA); pid = tsk->pid; if (act_log_check(bt, what, sector, pid)) @@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) goto out; } + if (tc & BLK_TC_FLUSH) + rwbs[i++] = 'F'; + if (tc & BLK_TC_DISCARD) rwbs[i++] = 'D'; else if (tc & BLK_TC_WRITE) @@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) else rwbs[i++] = 'N'; + if (tc & BLK_TC_FUA) + rwbs[i++] = 'F'; if (tc & BLK_TC_AHEAD) rwbs[i++] = 'A'; - if (tc & BLK_TC_BARRIER) - rwbs[i++] = 'B'; if (tc & BLK_TC_SYNC) rwbs[i++] = 'S'; if (tc & BLK_TC_META) @@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); static int blk_log_action_classic(struct trace_iterator *iter, const char *act) { - char rwbs[6]; + char rwbs[RWBS_LEN]; unsigned long long ts = iter->ts; unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); unsigned secs = (unsigned long)ts; @@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act) static int blk_log_action(struct trace_iterator *iter, const char *act) { - char rwbs[6]; + char rwbs[RWBS_LEN]; const struct blk_io_trace *t = te_blk_io_trace(iter->ent); fill_rwbs(rwbs, t); @@ -1561,7 +1566,7 @@ static const struct { } mask_maps[] = { { BLK_TC_READ, "read" }, { BLK_TC_WRITE, "write" }, - { BLK_TC_BARRIER, "barrier" }, + { BLK_TC_FLUSH, "flush" }, { BLK_TC_SYNC, "sync" }, { BLK_TC_QUEUE, "queue" }, { BLK_TC_REQUEUE, "requeue" }, @@ -1573,6 +1578,7 @@ static const struct { { BLK_TC_META, "meta" }, { BLK_TC_DISCARD, "discard" }, { BLK_TC_DRV_DATA, "drv_data" }, + { BLK_TC_FUA, "fua" }, }; static int blk_trace_str2mask(const char *str) @@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) { int i = 0; + if (rw & REQ_FLUSH) + rwbs[i++] = 'F'; + if (rw & WRITE) rwbs[i++] = 'W'; else if (rw & REQ_DISCARD) @@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) else rwbs[i++] = 'N'; + if (rw & REQ_FUA) + rwbs[i++] = 'F'; if (rw & REQ_RAHEAD) rwbs[i++] = 'A'; if (rw & REQ_SYNC) -- cgit v1.1 From bcf30e75b773b60379338768677a1301ef602ff9 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 11 Aug 2011 10:39:04 +0200 Subject: block: improve rq_affinity placement This patch reverts commit 35ae66e0a09ab70ed(block: Make rq_affinity = 1 work as expected). The purpose is to avoid an unnecessary IPI. Let's take an example. My test box has cpu 0-7, one socket. Say request is added from CPU 1, blk_complete_request() occurs at CPU 7. Without the reverted patch, softirq will be done at CPU 7. With it, an IPI will be directed to CPU 0, and softirq will be done at CPU 0. In this case, doing softirq at CPU 0 and CPU 7 have no difference from cache sharing point view and we can avoid an ipi if doing it in CPU 7. An immediate concern is this is just like QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is running in interrupt handler, and currently I/O controller doesn't support multiple interrupts (I checked several LSI cards and AHCI), so only one CPU can run blk_complete_request(). This is still quite different as QUEUE_FLAG_SAME_FORCE. Since only one CPU runs softirq, the only difference with below patch is softirq not always runs at the first CPU of a group. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-softirq.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 487addc..58340d0 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -103,7 +103,7 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = { void __blk_complete_request(struct request *req) { - int ccpu, cpu; + int ccpu, cpu, group_cpu = NR_CPUS; struct request_queue *q = req->q; unsigned long flags; @@ -117,12 +117,22 @@ void __blk_complete_request(struct request *req) */ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) { ccpu = req->cpu; - if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) + if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) { ccpu = blk_cpu_to_group(ccpu); + group_cpu = blk_cpu_to_group(cpu); + } } else ccpu = cpu; - if (ccpu == cpu) { + /* + * If current CPU and requested CPU are in the same group, running + * softirq in current CPU. One might concern this is just like + * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is + * running in interrupt handler, and currently I/O controller doesn't + * support multiple interrupts, so current CPU is unique actually. This + * avoids IPI sending from current CPU to the first CPU of a group. + */ + if (ccpu == cpu || ccpu == group_cpu) { struct list_head *list; do_local: list = &__get_cpu_var(blk_cpu_done); -- cgit v1.1 From b3b46d76d0fcbb1f737107cec1a1ee87bc5e5fd3 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 11 Aug 2011 12:06:28 -0400 Subject: APEI: Fix WHEA _OSC call Bit 0 of the support parameter to the OSC call should be set in order to indicate that the OS supports the WHEA mechanism. Stuart Hayes tracked an APEI issue on some Dell platforms down to this. Reported-by: Stuart Hayes Signed-off-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/apei/apei-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 8041248..6154036 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -618,7 +618,7 @@ int apei_osc_setup(void) }; capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; - capbuf[OSC_SUPPORT_TYPE] = 0; + capbuf[OSC_SUPPORT_TYPE] = 1; capbuf[OSC_CONTROL_TYPE] = 0; if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)) -- cgit v1.1 From d9b830fa444c1f4955d0ee88f5af2aa24d2c7837 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 11 Aug 2011 09:19:29 -0700 Subject: Input: mpu3050 - correct call to input_free_device input_free_device() should be used if input_register_device() was not called yet or if it failed. Signed-off-by: Axel Lin Signed-off-by: Dmitry Torokhov --- drivers/input/misc/mpu3050.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/mpu3050.c b/drivers/input/misc/mpu3050.c index b95fac1..f71dc72 100644 --- a/drivers/input/misc/mpu3050.c +++ b/drivers/input/misc/mpu3050.c @@ -282,7 +282,7 @@ err_free_irq: err_pm_set_suspended: pm_runtime_set_suspended(&client->dev); err_free_mem: - input_unregister_device(idev); + input_free_device(idev); kfree(sensor); return error; } -- cgit v1.1 From 22f83205e59c97c2460ad8e4bd6e71268cb2f37f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 11 Aug 2011 09:22:45 -0700 Subject: Input: tegra-kbc - correct call to input_free_device If kzalloc for kbc fails, then we have NULL pointer dereference while calling input_free_device(kbc->idev) in the error handling. So it is safer to always use the original name, input_dev. Signed-off-by: Axel Lin Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/tegra-kbc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c index f270447..a5a7791 100644 --- a/drivers/input/keyboard/tegra-kbc.c +++ b/drivers/input/keyboard/tegra-kbc.c @@ -702,7 +702,7 @@ err_iounmap: err_free_mem_region: release_mem_region(res->start, resource_size(res)); err_free_mem: - input_free_device(kbc->idev); + input_free_device(input_dev); kfree(kbc); return err; -- cgit v1.1 From 4b1bfb7d2d125af6653d6c2305356b2677f79dc6 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 10 Aug 2011 15:32:22 +0200 Subject: rt2x00: fix crash in rt2800usb_write_tx_desc Patch should fix this oops: BUG: unable to handle kernel NULL pointer dereference at 000000a0 IP: [] rt2800usb_write_tx_desc+0x18/0xc0 [rt2800usb] *pdpt = 000000002408c001 *pde = 0000000024079067 *pte = 0000000000000000 Oops: 0000 [#1] SMP EIP: 0060:[] EFLAGS: 00010282 CPU: 0 EIP is at rt2800usb_write_tx_desc+0x18/0xc0 [rt2800usb] EAX: 00000035 EBX: ef2bef10 ECX: 00000000 EDX: d40958a0 ESI: ef1865f8 EDI: ef1865f8 EBP: d4095878 ESP: d409585c DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Call Trace: [] rt2x00queue_write_tx_frame+0x155/0x300 [rt2x00lib] [] rt2x00mac_tx+0x7c/0x370 [rt2x00lib] [] ? mark_held_locks+0x62/0x90 [] ? _raw_spin_unlock_irqrestore+0x35/0x60 [] ? trace_hardirqs_on_caller+0x5a/0x170 [] ? trace_hardirqs_on+0xb/0x10 [] __ieee80211_tx+0x5c/0x1e0 [mac80211] [] ieee80211_tx+0xbc/0xe0 [mac80211] [] ? ieee80211_tx+0x23/0xe0 [mac80211] [] ieee80211_xmit+0xc1/0x200 [mac80211] [] ? ieee80211_tx+0xe0/0xe0 [mac80211] [] ? lock_release_holdtime+0x35/0x1b0 [] ? ieee80211_subif_start_xmit+0x446/0x5f0 [mac80211] [] ieee80211_subif_start_xmit+0x29d/0x5f0 [mac80211] [] ? ieee80211_subif_start_xmit+0x3e4/0x5f0 [mac80211] [] ? sock_setsockopt+0x6a8/0x6f0 [] ? sock_setsockopt+0x520/0x6f0 [] dev_hard_start_xmit+0x2ef/0x650 Oops might happen because we perform parallel putting new entries in a queue (rt2x00queue_write_tx_frame()) and removing entries after finishing transmitting (rt2800usb_work_txdone()). There are cases when _txdone may process an entry that was not fully send and nullify entry->skb . To fix check in _txdone if entry has flags that indicate pending transmission and wait until flags get cleared. Reported-by: Justin Piszcz Cc: stable@kernel.org Signed-off-by: Stanislaw Gruszka Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2800usb.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index 9395631..2cb25ea 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -464,6 +464,15 @@ static bool rt2800usb_txdone_entry_check(struct queue_entry *entry, u32 reg) int wcid, ack, pid; int tx_wcid, tx_ack, tx_pid; + if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) || + !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) { + WARNING(entry->queue->rt2x00dev, + "Data pending for entry %u in queue %u\n", + entry->entry_idx, entry->queue->qid); + cond_resched(); + return false; + } + wcid = rt2x00_get_field32(reg, TX_STA_FIFO_WCID); ack = rt2x00_get_field32(reg, TX_STA_FIFO_TX_ACK_REQUIRED); pid = rt2x00_get_field32(reg, TX_STA_FIFO_PID_TYPE); @@ -558,8 +567,10 @@ static void rt2800usb_work_txdone(struct work_struct *work) while (!rt2x00queue_empty(queue)) { entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE); - if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags)) + if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) || + !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) break; + if (test_bit(ENTRY_DATA_IO_FAILED, &entry->flags)) rt2x00lib_txdone_noinfo(entry, TXDONE_FAILURE); else if (rt2x00queue_status_timeout(entry)) -- cgit v1.1 From df71c9cfceea801e7e26e2c74241758ef9c042e5 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 10 Aug 2011 15:32:23 +0200 Subject: rt2x00: fix order of entry flags modification In rt2800usb_work_txdone we check flags in order: - ENTRY_OWNER_DEVICE_DATA - ENTRY_DATA_STATUS_PENDING - ENTRY_DATA_IO_FAILED Modify flags in separate order in rt2x00usb_interrupt_txdone, to avoid processing entries in _txdone with wrong flags or skip processing ready entries. Reported-by: Justin Piszcz Cc: stable@kernel.org Signed-off-by: Stanislaw Gruszka Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00usb.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c index b6b4542..7fbb55c 100644 --- a/drivers/net/wireless/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/rt2x00/rt2x00usb.c @@ -262,23 +262,20 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb) struct queue_entry *entry = (struct queue_entry *)urb->context; struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev; - if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags)) + if (!test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags)) return; - - if (rt2x00dev->ops->lib->tx_dma_done) - rt2x00dev->ops->lib->tx_dma_done(entry); - - /* - * Report the frame as DMA done - */ - rt2x00lib_dmadone(entry); - /* * Check if the frame was correctly uploaded */ if (urb->status) set_bit(ENTRY_DATA_IO_FAILED, &entry->flags); + /* + * Report the frame as DMA done + */ + rt2x00lib_dmadone(entry); + if (rt2x00dev->ops->lib->tx_dma_done) + rt2x00dev->ops->lib->tx_dma_done(entry); /* * Schedule the delayed work for reading the TX status * from the device. -- cgit v1.1 From 674db1344443204b6ce3293f2df8fd1b7665deea Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 10 Aug 2011 15:32:24 +0200 Subject: rt2x00: fix crash in rt2800usb_get_txwi Patch should fix this oops: BUG: unable to handle kernel NULL pointer dereference at 000000a0 IP: [] rt2800usb_get_txwi+0x19/0x70 [rt2800usb] *pdpt = 0000000000000000 *pde = f000ff53f000ff53 Oops: 0000 [#1] SMP Pid: 198, comm: kworker/u:3 Tainted: G W 3.0.0-wl+ #9 LENOVO 6369CTO/6369CTO EIP: 0060:[] EFLAGS: 00010283 CPU: 1 EIP is at rt2800usb_get_txwi+0x19/0x70 [rt2800usb] EAX: 00000000 EBX: f465e140 ECX: f4494960 EDX: ef24c5f8 ESI: 810f21f5 EDI: f1da9960 EBP: f4581e80 ESP: f4581e70 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process kworker/u:3 (pid: 198, ti=f4580000 task=f4494960 task.ti=f4580000) Call Trace: [] rt2800_txdone_entry+0x2f/0xf0 [rt2800lib] [] ? warn_slowpath_common+0x7d/0xa0 [] ? rt2800usb_work_txdone+0x288/0x360 [rt2800usb] [] ? rt2800usb_work_txdone+0x288/0x360 [rt2800usb] [] rt2800usb_work_txdone+0x263/0x360 [rt2800usb] [] process_one_work+0x186/0x440 [] ? process_one_work+0x10a/0x440 [] ? rt2800usb_probe_hw+0x120/0x120 [rt2800usb] [] worker_thread+0x133/0x310 [] ? trace_hardirqs_on+0xb/0x10 [] ? manage_workers+0x1e0/0x1e0 [] kthread+0x7c/0x90 [] ? __init_kthread_worker+0x60/0x60 [] kernel_thread_helper+0x6/0x1 Oops might happen because we check rt2x00queue_empty(queue) twice, but this condition can change and we can process entry in rt2800_txdone_entry(), which was already processed by rt2800usb_txdone_entry_check() -> rt2x00lib_txdone_noinfo() and has nullify entry->skb . Reported-by: Justin Piszcz Cc: stable@kernel.org Signed-off-by: Stanislaw Gruszka Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2800usb.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index 2cb25ea..dbf501c 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -538,12 +538,11 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev) entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE); if (rt2800usb_txdone_entry_check(entry, reg)) break; + entry = NULL; } - if (!entry || rt2x00queue_empty(queue)) - break; - - rt2800_txdone_entry(entry, reg); + if (entry) + rt2800_txdone_entry(entry, reg); } } -- cgit v1.1 From 96242116d483cd98ab55fb989ca096f6f9cc3738 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 11 Aug 2011 12:14:05 -0600 Subject: PNP: update pnp.debug usage (needs value on command line) Commit cdefba03e44 changed pnp.debug from a boot param to a module param, which means it needs a value when used on the command line. CC: Thomas Renninger Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e279b72..130713f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2081,9 +2081,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Override pmtimer IOPort with a hex value. e.g. pmtmr=0x508 - pnp.debug [PNP] - Enable PNP debug messages. This depends on the - CONFIG_PNP_DEBUG_MESSAGES option. + pnp.debug=1 [PNP] + Enable PNP debug messages (depends on the + CONFIG_PNP_DEBUG_MESSAGES option). Change at run-time + via /sys/module/pnp/parameters/debug. We always show + current resource usage; turning this on also shows + possible settings and some assignment information. pnpacpi= [ACPI] { off } -- cgit v1.1 From 03ba176a29dae5b4849f45c0b5c89b9d78baa2c6 Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Wed, 10 Aug 2011 10:46:22 +0800 Subject: ACPI APEI: Add Kconfig option IRQ_WORK for GHES IRQ_WORK is used by GHES, but it is selected by PERF_EVENT. For now PERF_EVENT is selected by x86 by default, but in concept, IRQ_WORK should be selected by GHES, not by others. Signed-off-by: Chen Gong Signed-off-by: Len Brown --- drivers/acpi/apei/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index c34aa51..e3f4787 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -13,6 +13,7 @@ config ACPI_APEI_GHES bool "APEI Generic Hardware Error Source" depends on ACPI_APEI && X86 select ACPI_HED + select IRQ_WORK select LLIST select GENERIC_ALLOCATOR help -- cgit v1.1 From 8475e2336cf80ba6e7b27715b4b3214d73c211ab Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Aug 2011 17:22:45 +0300 Subject: Bluetooth: unlock if allocation fails in hci_blacklist_add() There was a small typo here so we never actually hit the goto which would call hci_dev_unlock_bh(). Signed-off-by: Dan Carpenter Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ec0bc3f..fca62dc 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1327,7 +1327,7 @@ int hci_blacklist_add(struct hci_dev *hdev, bdaddr_t *bdaddr) entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL); if (!entry) { - return -ENOMEM; + err = -ENOMEM; goto err; } -- cgit v1.1 From 4935f1c164ac528dff3538f97953b385ba500710 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 9 Aug 2011 17:16:28 +0200 Subject: Bluetooth: btusb: be quiet on device disconnect Disabling the bluetooth usb device embedded in (some) ThinkPads tends to lead to errors like these: btusb_bulk_complete: hci0 urb ffff88011b9bfd68 failed to resubmit (19) btusb_intr_complete: hci0 urb ffff88011b46a318 failed to resubmit (19) btusb_bulk_complete: hci0 urb ffff88011b46a000 failed to resubmit (19) That is because usb_disconnect() doesn't "quiesces" pending urbs. Disconnecting a device is a normal thing to happen so it's no big deal that usb_submit_urb() returns -ENODEV. The simplest way to get rid of these errors is to stop treating that return as an error. Trivial, actually. While we're at it, add comments to be explicit about the reasons we're not complaining about -EPERM and -ENODEV. Signed-off-by: Paul Bolle Signed-off-by: Gustavo F. Padovan --- drivers/bluetooth/btusb.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 91d13a9..9e4448e 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -256,7 +256,9 @@ static void btusb_intr_complete(struct urb *urb) err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { - if (err != -EPERM) + /* -EPERM: urb is being killed; + * -ENODEV: device got disconnected */ + if (err != -EPERM && err != -ENODEV) BT_ERR("%s urb %p failed to resubmit (%d)", hdev->name, urb, -err); usb_unanchor_urb(urb); @@ -341,7 +343,9 @@ static void btusb_bulk_complete(struct urb *urb) err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { - if (err != -EPERM) + /* -EPERM: urb is being killed; + * -ENODEV: device got disconnected */ + if (err != -EPERM && err != -ENODEV) BT_ERR("%s urb %p failed to resubmit (%d)", hdev->name, urb, -err); usb_unanchor_urb(urb); @@ -431,7 +435,9 @@ static void btusb_isoc_complete(struct urb *urb) err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { - if (err != -EPERM) + /* -EPERM: urb is being killed; + * -ENODEV: device got disconnected */ + if (err != -EPERM && err != -ENODEV) BT_ERR("%s urb %p failed to resubmit (%d)", hdev->name, urb, -err); usb_unanchor_urb(urb); -- cgit v1.1 From 8e7c3d2e4ba18ee4cdcc1f89aec944fbff4ce735 Mon Sep 17 00:00:00 2001 From: Ricardo Mendoza Date: Wed, 13 Jul 2011 16:04:29 +0100 Subject: Bluetooth: Add Toshiba laptops AR30XX device ID Blacklist Toshiba-branded AR3011 based AR5B195 [0930:0215] and add to ath3k.c for firmware loading. Signed-off-by: Ricardo Mendoza Signed-off-by: Gustavo F. Padovan --- drivers/bluetooth/ath3k.c | 1 + drivers/bluetooth/btusb.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index a585473..db7cb81 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -63,6 +63,7 @@ static struct usb_device_id ath3k_table[] = { /* Atheros AR3011 with sflash firmware*/ { USB_DEVICE(0x0CF3, 0x3002) }, { USB_DEVICE(0x13d3, 0x3304) }, + { USB_DEVICE(0x0930, 0x0215) }, /* Atheros AR9285 Malbec with sflash firmware */ { USB_DEVICE(0x03F0, 0x311D) }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 9e4448e..3ef4760 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -106,6 +106,7 @@ static struct usb_device_id blacklist_table[] = { /* Atheros 3011 with sflash firmware */ { USB_DEVICE(0x0cf3, 0x3002), .driver_info = BTUSB_IGNORE }, { USB_DEVICE(0x13d3, 0x3304), .driver_info = BTUSB_IGNORE }, + { USB_DEVICE(0x0930, 0x0215), .driver_info = BTUSB_IGNORE }, /* Atheros AR9285 Malbec with sflash firmware */ { USB_DEVICE(0x03f0, 0x311d), .driver_info = BTUSB_IGNORE }, -- cgit v1.1 From e5842cdb0f4f2c68f6acd39e286e5d10d8c073e8 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:10:35 -0400 Subject: Bluetooth: rfcomm: Remove unnecessary krfcommd event Removed superfluous event handling which was used to signal that the rfcomm kthread had been woken. This appears to have been used to prevent lost wakeups. Correctly ordering when the task state is set to TASK_INTERRUPTIBLE is sufficient to prevent lost wakeups. To prevent wakeups which occurred prior to initially setting TASK_INTERRUPTIBLE from being lost, the main work of the thread loop - rfcomm_process_sessions() - is performed prior to sleeping. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/rfcomm/core.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 5759bb7..5ba3f6d 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -62,7 +62,6 @@ static DEFINE_MUTEX(rfcomm_mutex); #define rfcomm_lock() mutex_lock(&rfcomm_mutex) #define rfcomm_unlock() mutex_unlock(&rfcomm_mutex) -static unsigned long rfcomm_event; static LIST_HEAD(session_list); @@ -120,7 +119,6 @@ static inline void rfcomm_schedule(void) { if (!rfcomm_thread) return; - set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); wake_up_process(rfcomm_thread); } @@ -2038,19 +2036,18 @@ static int rfcomm_run(void *unused) rfcomm_add_listener(BDADDR_ANY); - while (!kthread_should_stop()) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) { - /* No pending events. Let's sleep. - * Incoming connections and data will wake us up. */ - schedule(); - } - set_current_state(TASK_RUNNING); + + if (kthread_should_stop()) + break; /* Process stuff */ - clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); rfcomm_process_sessions(); + + schedule(); } + __set_current_state(TASK_RUNNING); rfcomm_kill_listener(); -- cgit v1.1 From 950e2d51e866623e4c360280aa63b85ab66d3403 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:10:41 -0400 Subject: Bluetooth: rfcomm: Fix lost wakeups waiting to accept socket Fix race conditions which can cause lost wakeups (or missed signals) while waiting to accept an rfcomm socket connection. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/rfcomm/sock.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 8f01e6b..482722b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -485,11 +485,6 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f lock_sock(sk); - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - goto done; - } - if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; goto done; @@ -501,19 +496,20 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); - while (!(nsk = bt_accept_dequeue(sk, newsock))) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { - err = -EAGAIN; + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; break; } - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); + nsk = bt_accept_dequeue(sk, newsock); + if (nsk) + break; - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; + if (!timeo) { + err = -EAGAIN; break; } @@ -521,8 +517,12 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f err = sock_intr_errno(timeo); break; } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) -- cgit v1.1 From 9be4e3fbf2d3603e7a7010ede0697166738a788b Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:10:46 -0400 Subject: Bluetooth: Fix lost wakeups waiting for sock state change Fix race conditions which can cause lost wakeups while waiting for sock state to change. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/af_bluetooth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 8add9b4..117e0d1 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -494,9 +494,8 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) BT_DBG("sk %p", sk); add_wait_queue(sk_sleep(sk), &wait); + set_current_state(TASK_INTERRUPTIBLE); while (sk->sk_state != state) { - set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { err = -EINPROGRESS; break; @@ -510,12 +509,13 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); + set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) break; } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return err; } -- cgit v1.1 From f9a3c20aa07462108fc6fd759dea956053f020bb Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:10:52 -0400 Subject: Bluetooth: l2cap: Fix lost wakeups waiting to accept socket Fix race conditions which can cause lost wakeups (or misssed signals) while waiting to accept an l2cap socket connection. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_sock.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 5c36b3e..7d713b1 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -235,30 +235,26 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - goto done; - } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); - while (!(nsk = bt_accept_dequeue(sk, newsock))) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { - err = -EAGAIN; + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; break; } - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + nsk = bt_accept_dequeue(sk, newsock); + if (nsk) + break; - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; + if (!timeo) { + err = -EAGAIN; break; } @@ -266,8 +262,12 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl err = sock_intr_errno(timeo); break; } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) -- cgit v1.1 From 552b0d3cb9ff648aa503011ef50ca24019cd0f5f Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:11:01 -0400 Subject: Bluetooth: sco: Fix lost wakeups waiting to accept socket Fix race conditions which can cause lost wakeups (or missed signals) while waiting to accept a sco socket connection. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/sco.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 4c3621b..8270f05 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -564,30 +564,26 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag lock_sock(sk); - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - goto done; - } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); - while (!(ch = bt_accept_dequeue(sk, newsock))) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { - err = -EAGAIN; + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; break; } - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); + ch = bt_accept_dequeue(sk, newsock); + if (ch) + break; - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; + if (!timeo) { + err = -EAGAIN; break; } @@ -595,8 +591,12 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag err = sock_intr_errno(timeo); break; } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) -- cgit v1.1 From 38d57555616afcdad7381b02b523d494327494cd Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:11:07 -0400 Subject: Bluetooth: bnep: Fix lost wakeup of session thread Fix race condition which can result in missing the wakeup intended to stop the session thread. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/bnep/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index ca39fcf..7e8ff3c 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -484,9 +484,11 @@ static int bnep_session(void *arg) init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sk), &wait); - while (!kthread_should_stop()) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; /* RX */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); @@ -504,7 +506,7 @@ static int bnep_session(void *arg) schedule(); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); /* Cleanup session */ -- cgit v1.1 From 3a3f5c7df55a1294c9e6e2d0b8cea604b137438f Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:11:10 -0400 Subject: Bluetooth: cmtp: Fix lost wakeup of session thread Fix race condition which can result in missing the wakeup intended to stop the session thread. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/cmtp/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index c5b11af..2eb854a 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -292,9 +292,11 @@ static int cmtp_session(void *arg) init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sk), &wait); - while (!kthread_should_stop()) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; if (sk->sk_state != BT_CONNECTED) break; @@ -307,7 +309,7 @@ static int cmtp_session(void *arg) schedule(); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); down_write(&cmtp_session_sem); -- cgit v1.1 From a71a0cf4e9cdb1c43843977a1efc43f96f6efc21 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Mon, 25 Jul 2011 18:36:26 -0400 Subject: Bluetooth: l2cap: Fix lost wakeup waiting for ERTM acks Fix race condition which can result in missing wakeup during l2cap socket shutdown. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 3204ba8..b3bdb48 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1159,9 +1159,8 @@ int __l2cap_wait_ack(struct sock *sk) int timeo = HZ/5; add_wait_queue(sk_sleep(sk), &wait); - while ((chan->unacked_frames > 0 && chan->conn)) { - set_current_state(TASK_INTERRUPTIBLE); - + set_current_state(TASK_INTERRUPTIBLE); + while (chan->unacked_frames > 0 && chan->conn) { if (!timeo) timeo = HZ/5; @@ -1173,6 +1172,7 @@ int __l2cap_wait_ack(struct sock *sk) release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); + set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) -- cgit v1.1 From 6be6b11f006840ba7d8d4b959b3fa0c522f8468a Mon Sep 17 00:00:00 2001 From: Chen Ganir Date: Thu, 28 Jul 2011 15:42:09 +0300 Subject: Bluetooth: Fixed wrong L2CAP Sock timer value L2CAP connection timeout needs to be assigned as miliseconds and not as jiffies. Signed-off-by: Chen Ganir Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 7d713b1..61f1f62 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -993,7 +993,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p INIT_LIST_HEAD(&bt_sk(sk)->accept_q); sk->sk_destruct = l2cap_sock_destruct; - sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); + sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT; sock_reset_flag(sk, SOCK_ZAPPED); -- cgit v1.1 From 7bdb8a5cf17f66614a9897645efcd4ccc27535ee Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Tue, 26 Jul 2011 22:46:54 +0200 Subject: Bluetooth: Don't use cmd_timer to timeout HCI reset command No command should be send before Command Complete event for HCI reset is received. This fix regression introduced by commit 6bd32326cda(Bluetooth: Use proper timer for hci command timout) for chips whose reset command takes longer to complete (e.g. CSR) resulting in next command being send before HCI reset completed. Signed-off-by: Szymon Janc Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fca62dc..56943ad 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1209,7 +1209,6 @@ static void hci_cmd_timer(unsigned long arg) BT_ERR("%s command tx timeout", hdev->name); atomic_set(&hdev->cmd_cnt, 1); - clear_bit(HCI_RESET, &hdev->flags); tasklet_schedule(&hdev->cmd_task); } @@ -2408,7 +2407,10 @@ static void hci_cmd_task(unsigned long arg) if (hdev->sent_cmd) { atomic_dec(&hdev->cmd_cnt); hci_send_frame(skb); - mod_timer(&hdev->cmd_timer, + if (test_bit(HCI_RESET, &hdev->flags)) + del_timer(&hdev->cmd_timer); + else + mod_timer(&hdev->cmd_timer, jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT)); } else { skb_queue_head(&hdev->cmd_q, skb); -- cgit v1.1 From 751c10a56802513a6b057c8cf1552cecc1c9afde Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:41:35 -0400 Subject: Bluetooth: bnep: Fix deadlock in session deletion Commit f4d7cd4a4c introduced the usage of kthread API. kthread_stop is a blocking function which returns only when the thread exits. In this case, the thread can't exit because it's waiting for the write lock, which is being held by bnep_del_connection() which is waiting for the thread to exit -- deadlock. Use atomic_t/wake_up_process instead to signal to the thread to exit. Signed-off-by: Jaikumar Ganesh Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/bnep/bnep.h | 1 + net/bluetooth/bnep/core.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index 8e6c061..e7ee531 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -155,6 +155,7 @@ struct bnep_session { unsigned int role; unsigned long state; unsigned long flags; + atomic_t terminate; struct task_struct *task; struct ethhdr eh; diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 7e8ff3c..d9edfe8 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -487,7 +487,7 @@ static int bnep_session(void *arg) while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) + if (atomic_read(&s->terminate)) break; /* RX */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { @@ -642,9 +642,10 @@ int bnep_del_connection(struct bnep_conndel_req *req) down_read(&bnep_session_sem); s = __bnep_get_session(req->dst); - if (s) - kthread_stop(s->task); - else + if (s) { + atomic_inc(&s->terminate); + wake_up_process(s->task); + } else err = -ENOENT; up_read(&bnep_session_sem); -- cgit v1.1 From 7176522cdca1f0b78a1434b41761f0334511822a Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:44:21 -0400 Subject: Bluetooth: cmtp: Fix deadlock in session deletion Commit fada4ac339 introduced the usage of kthread API. kthread_stop is a blocking function which returns only when the thread exits. In this case, the thread can't exit because it's waiting for the write lock, which is being held by cmtp_del_connection() which is waiting for the thread to exit -- deadlock. Revert cmtp_reset_ctr to its original behavior: non-blocking signalling for the session to terminate. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/cmtp/capi.c | 3 ++- net/bluetooth/cmtp/cmtp.h | 1 + net/bluetooth/cmtp/core.c | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 040f67b..50f0d13 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -386,7 +386,8 @@ static void cmtp_reset_ctr(struct capi_ctr *ctrl) capi_ctr_down(ctrl); - kthread_stop(session->task); + atomic_inc(&session->terminate); + wake_up_process(session->task); } static void cmtp_register_appl(struct capi_ctr *ctrl, __u16 appl, capi_register_params *rp) diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h index db43b54..c32638d 100644 --- a/net/bluetooth/cmtp/cmtp.h +++ b/net/bluetooth/cmtp/cmtp.h @@ -81,6 +81,7 @@ struct cmtp_session { char name[BTNAMSIZ]; + atomic_t terminate; struct task_struct *task; wait_queue_head_t wait; diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 2eb854a..42cb2f4 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -295,7 +295,7 @@ static int cmtp_session(void *arg) while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) + if (atomic_read(&session->terminate)) break; if (sk->sk_state != BT_CONNECTED) break; @@ -416,7 +416,8 @@ int cmtp_del_connection(struct cmtp_conndel_req *req) skb_queue_purge(&session->transmit); /* Stop session thread */ - kthread_stop(session->task); + atomic_inc(&session->terminate); + wake_up_process(session->task); } else err = -ENOENT; -- cgit v1.1 From e9d5cb541b22aa651edc29990092ec5f8174cd39 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:51:26 -0400 Subject: Bluetooth: hidp: Fix session cleanup on failed conn add Once the session thread is running, cleanup must be handled by the session thread only. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hidp/core.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 43b4c2d..7e19a01 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1044,8 +1044,12 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, } err = hid_add_device(session->hid); - if (err < 0) - goto err_add_device; + if (err < 0) { + atomic_inc(&session->terminate); + wake_up_process(session->task); + up_write(&hidp_session_sem); + return err; + } if (session->input) { hidp_send_ctrl_message(session, @@ -1059,12 +1063,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, up_write(&hidp_session_sem); return 0; -err_add_device: - hid_destroy_device(session->hid); - session->hid = NULL; - atomic_inc(&session->terminate); - wake_up_process(session->task); - unlink: hidp_del_timer(session); -- cgit v1.1 From 1c97e94c0b7c56319754ee6f9ccd2e93fe1ee2b3 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:51:34 -0400 Subject: Bluetooth: hidp: Fix memory leak of cached report descriptor Free the cached HID report descriptor on thread terminate. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hidp/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 7e19a01..26f0d10 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -764,6 +764,7 @@ static int hidp_session(void *arg) up_write(&hidp_session_sem); + kfree(session->rd_data); kfree(session); return 0; } -- cgit v1.1 From 615aedd6e5add8104f031b0d547285652d04d330 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:51:50 -0400 Subject: Bluetooth: hidp: Only free input device if failed register When an hidp connection is added for a boot protocol input device, only free the allocated device if device registration fails. Subsequent failures should only unregister the device (the input device api documents that unregister will also free the allocated device). Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hidp/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 26f0d10..a859f90 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -842,6 +842,8 @@ static int hidp_setup_input(struct hidp_session *session, err = input_register_device(input); if (err < 0) { + input_free_device(input); + session->input = NULL; hci_conn_put_device(session->conn); return err; } @@ -1089,7 +1091,6 @@ purge: failed: up_write(&hidp_session_sem); - input_free_device(session->input); kfree(session); return err; } -- cgit v1.1 From ff062ea109217329b88693bc9081da893eb8b71b Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:52:01 -0400 Subject: Bluetooth: hidp: Don't release device ref if never held When an hidp connection is added for a boot protocol input device, don't release a device reference that was never acquired. The device reference is acquired when the session is linked to the session list (which hasn't happened yet when hidp_setup_input is called). Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hidp/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index a859f90..fb68f34 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -844,7 +844,6 @@ static int hidp_setup_input(struct hidp_session *session, if (err < 0) { input_free_device(input); session->input = NULL; - hci_conn_put_device(session->conn); return err; } -- cgit v1.1 From 687beaa0d1d937c327e2f97b4b4fa6c23ca70624 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:53:52 -0400 Subject: Bluetooth: cmtp: Fix session cleanup on failed conn add Once the session thread is running, cleanup must be handled by the session thread only. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/cmtp/core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 42cb2f4..521baa4 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -382,16 +382,17 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) if (!(session->flags & (1 << CMTP_LOOPBACK))) { err = cmtp_attach_device(session); - if (err < 0) - goto detach; + if (err < 0) { + atomic_inc(&session->terminate); + wake_up_process(session->task); + up_write(&cmtp_session_sem); + return err; + } } up_write(&cmtp_session_sem); return 0; -detach: - cmtp_detach_device(session); - unlink: __cmtp_unlink_session(session); -- cgit v1.1 From 567b20e02bc1b2bcd69e8bfc022dec3da3fefb89 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 12 Jul 2011 19:05:29 +0800 Subject: usb: gadget: s3c2410_udc: fix unterminated platform_device_id table platform_device_id structures need a NULL terminating entry, add it. Signed-off-by: Axel Lin Signed-off-by: Felipe Balbi --- drivers/usb/gadget/s3c2410_udc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c index 85c1b0d..8d31848 100644 --- a/drivers/usb/gadget/s3c2410_udc.c +++ b/drivers/usb/gadget/s3c2410_udc.c @@ -2060,6 +2060,7 @@ static int s3c2410_udc_resume(struct platform_device *pdev) static const struct platform_device_id s3c_udc_ids[] = { { "s3c2410-usbgadget", }, { "s3c2440-usbgadget", }, + { } }; MODULE_DEVICE_TABLE(platform, s3c_udc_ids); -- cgit v1.1 From aba1350fdac7cb52f86e6818addb26d03b3ef9bc Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 19 Jul 2011 21:47:01 +0200 Subject: usb: gadget: fusb300: remove #if 0 block The code in this block is unused and the Author is fine with removing: | These functions were used to debug unstable hw fifo while developing | fusb300. It's much more stable now. | So these functions can be removed. Cc: "Wendy Yuan-Hsin Chen" Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Felipe Balbi --- drivers/usb/gadget/fusb300_udc.c | 101 --------------------------------------- 1 file changed, 101 deletions(-) diff --git a/drivers/usb/gadget/fusb300_udc.c b/drivers/usb/gadget/fusb300_udc.c index 24a9243..4ec888f 100644 --- a/drivers/usb/gadget/fusb300_udc.c +++ b/drivers/usb/gadget/fusb300_udc.c @@ -609,107 +609,6 @@ void fusb300_rdcxf(struct fusb300 *fusb300, } } -#if 0 -static void fusb300_dbg_fifo(struct fusb300_ep *ep, - u8 entry, u16 length) -{ - u32 reg; - u32 i = 0; - u32 j = 0; - - reg = ioread32(ep->fusb300->reg + FUSB300_OFFSET_GTM); - reg &= ~(FUSB300_GTM_TST_EP_ENTRY(0xF) | - FUSB300_GTM_TST_EP_NUM(0xF) | FUSB300_GTM_TST_FIFO_DEG); - reg |= (FUSB300_GTM_TST_EP_ENTRY(entry) | - FUSB300_GTM_TST_EP_NUM(ep->epnum) | FUSB300_GTM_TST_FIFO_DEG); - iowrite32(reg, ep->fusb300->reg + FUSB300_OFFSET_GTM); - - for (i = 0; i < (length >> 2); i++) { - if (i * 4 == 1024) - break; - reg = ioread32(ep->fusb300->reg + - FUSB300_OFFSET_BUFDBG_START + i * 4); - printk(KERN_DEBUG" 0x%-8x", reg); - j++; - if ((j % 4) == 0) - printk(KERN_DEBUG "\n"); - } - - if (length % 4) { - reg = ioread32(ep->fusb300->reg + - FUSB300_OFFSET_BUFDBG_START + i * 4); - printk(KERN_DEBUG " 0x%x\n", reg); - } - - if ((j % 4) != 0) - printk(KERN_DEBUG "\n"); - - fusb300_disable_bit(ep->fusb300, FUSB300_OFFSET_GTM, - FUSB300_GTM_TST_FIFO_DEG); -} - -static void fusb300_cmp_dbg_fifo(struct fusb300_ep *ep, - u8 entry, u16 length, u8 *golden) -{ - u32 reg; - u32 i = 0; - u32 golden_value; - u8 *tmp; - - tmp = golden; - - printk(KERN_DEBUG "fusb300_cmp_dbg_fifo (entry %d) : start\n", entry); - - reg = ioread32(ep->fusb300->reg + FUSB300_OFFSET_GTM); - reg &= ~(FUSB300_GTM_TST_EP_ENTRY(0xF) | - FUSB300_GTM_TST_EP_NUM(0xF) | FUSB300_GTM_TST_FIFO_DEG); - reg |= (FUSB300_GTM_TST_EP_ENTRY(entry) | - FUSB300_GTM_TST_EP_NUM(ep->epnum) | FUSB300_GTM_TST_FIFO_DEG); - iowrite32(reg, ep->fusb300->reg + FUSB300_OFFSET_GTM); - - for (i = 0; i < (length >> 2); i++) { - if (i * 4 == 1024) - break; - golden_value = *tmp | *(tmp + 1) << 8 | - *(tmp + 2) << 16 | *(tmp + 3) << 24; - - reg = ioread32(ep->fusb300->reg + - FUSB300_OFFSET_BUFDBG_START + i*4); - - if (reg != golden_value) { - printk(KERN_DEBUG "0x%x : ", (u32)(ep->fusb300->reg + - FUSB300_OFFSET_BUFDBG_START + i*4)); - printk(KERN_DEBUG " golden = 0x%x, reg = 0x%x\n", - golden_value, reg); - } - tmp += 4; - } - - switch (length % 4) { - case 1: - golden_value = *tmp; - case 2: - golden_value = *tmp | *(tmp + 1) << 8; - case 3: - golden_value = *tmp | *(tmp + 1) << 8 | *(tmp + 2) << 16; - default: - break; - - reg = ioread32(ep->fusb300->reg + FUSB300_OFFSET_BUFDBG_START + i*4); - if (reg != golden_value) { - printk(KERN_DEBUG "0x%x:", (u32)(ep->fusb300->reg + - FUSB300_OFFSET_BUFDBG_START + i*4)); - printk(KERN_DEBUG " golden = 0x%x, reg = 0x%x\n", - golden_value, reg); - } - } - - printk(KERN_DEBUG "fusb300_cmp_dbg_fifo : end\n"); - fusb300_disable_bit(ep->fusb300, FUSB300_OFFSET_GTM, - FUSB300_GTM_TST_FIFO_DEG); -} -#endif - static void fusb300_rdfifo(struct fusb300_ep *ep, struct fusb300_request *req, u32 length) -- cgit v1.1 From 6a22158c596c1531b143c884d479285ef90608d1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 19 Jul 2011 20:21:52 +0200 Subject: usb: gadget: composite: fix bMaxPacketSize for SuperSpeed For bMaxPacketSize0 we usually take what is specified in ep0->maxpacket. This is fine in most cases, however on SuperSpeed bMaxPacketSize0 specifies the exponent instead of the actual size in bytes. The only valid value on SS is 9 which denotes 512 bytes. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Felipe Balbi --- drivers/usb/gadget/composite.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 5ef8779..aef4741 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1079,10 +1079,12 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) cdev->desc.bMaxPacketSize0 = cdev->gadget->ep0->maxpacket; if (gadget_is_superspeed(gadget)) { - if (gadget->speed >= USB_SPEED_SUPER) + if (gadget->speed >= USB_SPEED_SUPER) { cdev->desc.bcdUSB = cpu_to_le16(0x0300); - else + cdev->desc.bMaxPacketSize0 = 9; + } else { cdev->desc.bcdUSB = cpu_to_le16(0x0210); + } } value = min(w_length, (u16) sizeof cdev->desc); -- cgit v1.1 From 74c6f3a42a5af424dd954916a5e69d00271b943a Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Sun, 17 Jul 2011 18:28:00 +0300 Subject: usb: musb: tusb6010_omap: fix build failure: error: 'musb' undeclared CC drivers/usb/musb/tusb6010_omap.o drivers/usb/musb/tusb6010_omap.c: In function 'tusb_omap_use_shared_dmareq': drivers/usb/musb/tusb6010_omap.c:92: error: 'musb' undeclared (first use in this function) drivers/usb/musb/tusb6010_omap.c:92: error: (Each undeclared identifier is reported only once drivers/usb/musb/tusb6010_omap.c:92: error: for each function it appears in.) Signed-off-by: Sergei Trofimovich Signed-off-by: Felipe Balbi --- drivers/usb/musb/tusb6010_omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c index c784e6c..07c8a73 100644 --- a/drivers/usb/musb/tusb6010_omap.c +++ b/drivers/usb/musb/tusb6010_omap.c @@ -89,7 +89,7 @@ static inline int tusb_omap_use_shared_dmareq(struct tusb_omap_dma_ch *chdat) u32 reg = musb_readl(chdat->tbase, TUSB_DMA_EP_MAP); if (reg != 0) { - dev_dbg(musb->controller, "ep%i dmareq0 is busy for ep%i\n", + dev_dbg(chdat->musb->controller, "ep%i dmareq0 is busy for ep%i\n", chdat->epnum, reg & 0xf); return -EAGAIN; } -- cgit v1.1 From 26e5c3e227d15a44402e1c9ab817fe48142b4b99 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Mon, 18 Jul 2011 18:38:47 +0530 Subject: usb: musb: fix Kconfig After 622859634 (usb: musb: drop a gigantic amount of ifdeferry): - USB_GADGET_MUSB_HDRC is no longer selectable because it depends on the removed USB_MUSB_PERIPHERAL and USB_MUSB_OTG options - The Kconfig comment still says "Enable Host or Gadget support to see Inventra options", even though you now need to enable both of them to see Inventra options. Fix the dependency and drop the anyway unnecessary comment. Signed-off-by: Rabin Vincent Signed-off-by: Felipe Balbi --- drivers/usb/gadget/Kconfig | 2 +- drivers/usb/musb/Kconfig | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 44b6b40..5a084b9 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -310,7 +310,7 @@ config USB_PXA_U2O # musb builds in ../musb along with host support config USB_GADGET_MUSB_HDRC tristate "Inventra HDRC USB Peripheral (TI, ADI, ...)" - depends on USB_MUSB_HDRC && (USB_MUSB_PERIPHERAL || USB_MUSB_OTG) + depends on USB_MUSB_HDRC select USB_GADGET_DUALSPEED help This OTG-capable silicon IP is used in dual designs including diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 6192b45..fc34b8b 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -3,9 +3,6 @@ # for silicon based on Mentor Graphics INVENTRA designs # -comment "Enable Host or Gadget support to see Inventra options" - depends on !USB && USB_GADGET=n - # (M)HDRC = (Multipoint) Highspeed Dual-Role Controller config USB_MUSB_HDRC depends on USB && USB_GADGET -- cgit v1.1 From 71964b9a0c06f2804be3b6ff47fab07a9468ecb4 Mon Sep 17 00:00:00 2001 From: Sebastian Bauer Date: Thu, 21 Jul 2011 15:40:07 +0200 Subject: usb: gadget: hid: don't STALL when processing a HID Descriptor request This is a patch to fix an issue with the HID gadget which, at the moment, returns STALL on a HID descriptor request. Essentially, the patch changes the hid gadget such that a request for the HID descriptor is handled by copying the descriptor into the response buffer, rather than falling through the default case, in which the request is answered by a STALL. Signed-off-by: Sebastian Bauer Acked-by: Peter Korsgaard Signed-off-by: Felipe Balbi --- drivers/usb/gadget/f_hid.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/gadget/f_hid.c b/drivers/usb/gadget/f_hid.c index 403a48b..83a266b 100644 --- a/drivers/usb/gadget/f_hid.c +++ b/drivers/usb/gadget/f_hid.c @@ -367,6 +367,13 @@ static int hidg_setup(struct usb_function *f, case ((USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_INTERFACE) << 8 | USB_REQ_GET_DESCRIPTOR): switch (value >> 8) { + case HID_DT_HID: + VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: HID\n"); + length = min_t(unsigned short, length, + hidg_desc.bLength); + memcpy(req->buf, &hidg_desc, length); + goto respond; + break; case HID_DT_REPORT: VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: REPORT\n"); length = min_t(unsigned short, length, -- cgit v1.1 From 15154962f777e4ab38adb7641ccae92194c9a96b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 28 Jul 2011 22:59:53 +0800 Subject: usb: host: ehci-omap: fix .remove and failure handling path of .probe(v1) Obviously, disabling & put regulator and iounmap(hcd->regs) are missed in .remove and failure handling path of .probe, so add them. Signed-off-by: Ming Lei Acked-by: Alan Stern Tested-by: Keshava Munegowda Signed-off-by: Felipe Balbi --- drivers/usb/host/ehci-omap.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c index 55a57c2..4524032 100644 --- a/drivers/usb/host/ehci-omap.c +++ b/drivers/usb/host/ehci-omap.c @@ -98,6 +98,18 @@ static void omap_ehci_soft_phy_reset(struct platform_device *pdev, u8 port) } } +static void disable_put_regulator( + struct ehci_hcd_omap_platform_data *pdata) +{ + int i; + + for (i = 0 ; i < OMAP3_HS_USB_PORTS ; i++) { + if (pdata->regulator[i]) { + regulator_disable(pdata->regulator[i]); + regulator_put(pdata->regulator[i]); + } + } +} /* configure so an HC device and id are always provided */ /* always called with process context; sleeping is OK */ @@ -231,9 +243,11 @@ err_add_hcd: omap_usbhs_disable(dev); err_enable: + disable_put_regulator(pdata); usb_put_hcd(hcd); err_io: + iounmap(regs); return ret; } @@ -253,6 +267,8 @@ static int ehci_hcd_omap_remove(struct platform_device *pdev) usb_remove_hcd(hcd); omap_usbhs_disable(dev); + disable_put_regulator(dev->platform_data); + iounmap(hcd->regs); usb_put_hcd(hcd); return 0; } -- cgit v1.1 From 93e098a8fc02c579875e64001f7a511b7e75a16c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 20 Jul 2011 17:09:34 -0700 Subject: usb: musb: fix oops on musb_gadget_pullup an 'unhandled fault' is causes when a gadget driver calls usb_gadget_connect() while the USB cable isn't plugged into the OTG port. the fault is caused by an access to MUSB's memory space while its clock is turned off due to pm_runtime kicking in. in order to fix the fault, we enclose musb_gadget_pullup() with pm_runtime_get_sync() ... pm_runtime_put() calls to be sure we will always reach that path with clock turned on. [ balbi@ti.com : simplified commit log; removed few things which didn't belong there ] Cc: stable@kernel.org Reported-by: Zach Pfeffer Signed-off-by: John Stultz Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_gadget.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index b67a062..8c41a2e 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1698,6 +1698,8 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on) is_on = !!is_on; + pm_runtime_get_sync(musb->controller); + /* NOTE: this assumes we are sensing vbus; we'd rather * not pullup unless the B-session is active. */ @@ -1707,6 +1709,9 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on) musb_pullup(musb, is_on); } spin_unlock_irqrestore(&musb->lock, flags); + + pm_runtime_put(musb->controller); + return 0; } -- cgit v1.1 From d366d39bab562545ccb4a5931d62d0fd9e6a8ffc Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Tue, 2 Aug 2011 17:33:39 +0200 Subject: usb: musb: ux500: set dma config for both src and dst The dma driver requires both src and dst to be set. This fix is needed in order to run gadget mass storage. Patch is verified on snowball. Signed-off-by: Per Forlin Acked-by: Mian Yousaf Kaukab Acked-by: Linus Walleij Signed-off-by: Felipe Balbi --- drivers/usb/musb/ux500_dma.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/usb/musb/ux500_dma.c b/drivers/usb/musb/ux500_dma.c index cecace4..2313475 100644 --- a/drivers/usb/musb/ux500_dma.c +++ b/drivers/usb/musb/ux500_dma.c @@ -133,15 +133,13 @@ static bool ux500_configure_channel(struct dma_channel *channel, DMA_SLAVE_BUSWIDTH_4_BYTES; slave_conf.direction = direction; - if (direction == DMA_FROM_DEVICE) { - slave_conf.src_addr = usb_fifo_addr; - slave_conf.src_addr_width = addr_width; - slave_conf.src_maxburst = 16; - } else { - slave_conf.dst_addr = usb_fifo_addr; - slave_conf.dst_addr_width = addr_width; - slave_conf.dst_maxburst = 16; - } + slave_conf.src_addr = usb_fifo_addr; + slave_conf.src_addr_width = addr_width; + slave_conf.src_maxburst = 16; + slave_conf.dst_addr = usb_fifo_addr; + slave_conf.dst_addr_width = addr_width; + slave_conf.dst_maxburst = 16; + dma_chan->device->device_control(dma_chan, DMA_SLAVE_CONFIG, (unsigned long) &slave_conf); -- cgit v1.1 From afbd0749c0507d5fea980b3bfa76efc43af83d60 Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Wed, 3 Aug 2011 14:22:17 +0200 Subject: usb: musb: ux500: replace missing DBG with dev_dbg ux500_dma.c fail to compile becase DBG has been removed from musb_debug. Use dev_dbg for all prints. Cc: stable@vger.kernel.org Signed-off-by: Per Forlin Acked-by: Mian Yousaf Kaukab Signed-off-by: Felipe Balbi --- drivers/usb/musb/ux500_dma.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/usb/musb/ux500_dma.c b/drivers/usb/musb/ux500_dma.c index 2313475..ef4333f 100644 --- a/drivers/usb/musb/ux500_dma.c +++ b/drivers/usb/musb/ux500_dma.c @@ -65,7 +65,8 @@ static void ux500_tx_work(struct work_struct *data) struct musb *musb = hw_ep->musb; unsigned long flags; - DBG(4, "DMA tx transfer done on hw_ep=%d\n", hw_ep->epnum); + dev_dbg(musb->controller, "DMA tx transfer done on hw_ep=%d\n", + hw_ep->epnum); spin_lock_irqsave(&musb->lock, flags); ux500_channel->channel.actual_len = ux500_channel->cur_len; @@ -84,7 +85,8 @@ static void ux500_rx_work(struct work_struct *data) struct musb *musb = hw_ep->musb; unsigned long flags; - DBG(4, "DMA rx transfer done on hw_ep=%d\n", hw_ep->epnum); + dev_dbg(musb->controller, "DMA rx transfer done on hw_ep=%d\n", + hw_ep->epnum); spin_lock_irqsave(&musb->lock, flags); ux500_channel->channel.actual_len = ux500_channel->cur_len; @@ -116,9 +118,11 @@ static bool ux500_configure_channel(struct dma_channel *channel, enum dma_slave_buswidth addr_width; dma_addr_t usb_fifo_addr = (MUSB_FIFO_OFFSET(hw_ep->epnum) + ux500_channel->controller->phy_base); + struct musb *musb = ux500_channel->controller->private_data; - DBG(4, "packet_sz=%d, mode=%d, dma_addr=0x%x, len=%d is_tx=%d\n", - packet_sz, mode, dma_addr, len, ux500_channel->is_tx); + dev_dbg(musb->controller, + "packet_sz=%d, mode=%d, dma_addr=0x%x, len=%d is_tx=%d\n", + packet_sz, mode, dma_addr, len, ux500_channel->is_tx); ux500_channel->cur_len = len; @@ -164,6 +168,7 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c, struct ux500_dma_controller *controller = container_of(c, struct ux500_dma_controller, controller); struct ux500_dma_channel *ux500_channel = NULL; + struct musb *musb = controller->private_data; u8 ch_num = hw_ep->epnum - 1; u32 max_ch; @@ -190,7 +195,7 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c, ux500_channel->hw_ep = hw_ep; ux500_channel->is_allocated = 1; - DBG(7, "hw_ep=%d, is_tx=0x%x, channel=%d\n", + dev_dbg(musb->controller, "hw_ep=%d, is_tx=0x%x, channel=%d\n", hw_ep->epnum, is_tx, ch_num); return &(ux500_channel->channel); @@ -199,8 +204,9 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c, static void ux500_dma_channel_release(struct dma_channel *channel) { struct ux500_dma_channel *ux500_channel = channel->private_data; + struct musb *musb = ux500_channel->controller->private_data; - DBG(7, "channel=%d\n", ux500_channel->ch_num); + dev_dbg(musb->controller, "channel=%d\n", ux500_channel->ch_num); if (ux500_channel->is_allocated) { ux500_channel->is_allocated = 0; @@ -250,8 +256,8 @@ static int ux500_dma_channel_abort(struct dma_channel *channel) void __iomem *epio = musb->endpoints[ux500_channel->hw_ep->epnum].regs; u16 csr; - DBG(4, "channel=%d, is_tx=%d\n", ux500_channel->ch_num, - ux500_channel->is_tx); + dev_dbg(musb->controller, "channel=%d, is_tx=%d\n", + ux500_channel->ch_num, ux500_channel->is_tx); if (channel->status == MUSB_DMA_STATUS_BUSY) { if (ux500_channel->is_tx) { -- cgit v1.1 From f847a79ab3c1faca3022061045cd22e4678c1b1c Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Wed, 3 Aug 2011 15:39:15 +0200 Subject: usb: musb: cppi: fix build errors due to DBG and missing musb variable Replace DBG with dev_dbg and fix invalid access of musb->controller. With this patch cppi_dma builds successfully. Cc: Signed-off-by: Per Forlin Signed-off-by: Felipe Balbi --- drivers/usb/musb/cppi_dma.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c index 149f3f3..318fb4e 100644 --- a/drivers/usb/musb/cppi_dma.c +++ b/drivers/usb/musb/cppi_dma.c @@ -226,8 +226,10 @@ static int cppi_controller_stop(struct dma_controller *c) struct cppi *controller; void __iomem *tibase; int i; + struct musb *musb; controller = container_of(c, struct cppi, controller); + musb = controller->musb; tibase = controller->tibase; /* DISABLE INDIVIDUAL CHANNEL Interrupts */ @@ -289,9 +291,11 @@ cppi_channel_allocate(struct dma_controller *c, u8 index; struct cppi_channel *cppi_ch; void __iomem *tibase; + struct musb *musb; controller = container_of(c, struct cppi, controller); tibase = controller->tibase; + musb = controller->musb; /* ep0 doesn't use DMA; remember cppi indices are 0..N-1 */ index = ep->epnum - 1; @@ -339,7 +343,8 @@ static void cppi_channel_release(struct dma_channel *channel) c = container_of(channel, struct cppi_channel, channel); tibase = c->controller->tibase; if (!c->hw_ep) - dev_dbg(musb->controller, "releasing idle DMA channel %p\n", c); + dev_dbg(c->controller->musb->controller, + "releasing idle DMA channel %p\n", c); else if (!c->transmit) core_rxirq_enable(tibase, c->index + 1); @@ -357,10 +362,11 @@ cppi_dump_rx(int level, struct cppi_channel *c, const char *tag) musb_ep_select(base, c->index + 1); - DBG(level, "RX DMA%d%s: %d left, csr %04x, " - "%08x H%08x S%08x C%08x, " - "B%08x L%08x %08x .. %08x" - "\n", + dev_dbg(c->controller->musb->controller, + "RX DMA%d%s: %d left, csr %04x, " + "%08x H%08x S%08x C%08x, " + "B%08x L%08x %08x .. %08x" + "\n", c->index, tag, musb_readl(c->controller->tibase, DAVINCI_RXCPPI_BUFCNT0_REG + 4 * c->index), @@ -387,10 +393,11 @@ cppi_dump_tx(int level, struct cppi_channel *c, const char *tag) musb_ep_select(base, c->index + 1); - DBG(level, "TX DMA%d%s: csr %04x, " - "H%08x S%08x C%08x %08x, " - "F%08x L%08x .. %08x" - "\n", + dev_dbg(c->controller->musb->controller, + "TX DMA%d%s: csr %04x, " + "H%08x S%08x C%08x %08x, " + "F%08x L%08x .. %08x" + "\n", c->index, tag, musb_readw(c->hw_ep->regs, MUSB_TXCSR), @@ -1022,6 +1029,7 @@ static bool cppi_rx_scan(struct cppi *cppi, unsigned ch) int i; dma_addr_t safe2ack; void __iomem *regs = rx->hw_ep->regs; + struct musb *musb = cppi->musb; cppi_dump_rx(6, rx, "/K"); -- cgit v1.1 From cf6808cb09e72fa7ee8713bf48219a2eb98da91b Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 3 Aug 2011 21:41:26 -0700 Subject: usb: gadget: renesas_usbhs: fix DMA build by including dma-mapping.h Include dma-mapping.h to fix build of the renesas_usbhs driver | CC drivers/usb/renesas_usbhs/mod_gadget.o | drivers/usb/renesas_usbhs/mod_gadget.c: In function 'usbhsg_dma_map': | drivers/usb/renesas_usbhs/mod_gadget.c:190: error: implicit declaration of function 'dma_map_single' | drivers/usb/renesas_usbhs/mod_gadget.c:192: error: implicit declaration of function 'dma_sync_single_for_device' | drivers/usb/renesas_usbhs/mod_gadget.c:196: error: implicit declaration of function 'dma_mapping_error' | drivers/usb/renesas_usbhs/mod_gadget.c: In function 'usbhsg_dma_unmap': | drivers/usb/renesas_usbhs/mod_gadget.c:217: error: implicit declaration of function 'dma_unmap_single' | drivers/usb/renesas_usbhs/mod_gadget.c:219: error: implicit declaration of function 'dma_sync_single_for_cpu' | make[5]: *** [drivers/usb/renesas_usbhs/mod_gadget.o] Error 1 | make[4]: *** [drivers/usb/renesas_usbhs] Error 2 Reported-by: Magnus Damm Signed-off-by: Kuninori Morimoto Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/mod_gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index ba79dbf..e7101dc 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -14,6 +14,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ +#include #include #include #include -- cgit v1.1 From 240a16e2cd831cb25361b1d1797bd04e8faf8b4f Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 5 Aug 2011 13:29:49 +0300 Subject: usb: musb: tusb6010: fix compilation earlier commits have broken compilation of tusb6010 glue layer, fix it. Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_core.h | 12 ++++++++---- drivers/usb/musb/musb_regs.h | 6 ++++-- drivers/usb/musb/tusb6010.c | 1 + drivers/usb/musb/tusb6010_omap.c | 1 + 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index 668eeef..b3c065a 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -172,7 +172,8 @@ enum musb_g_ep0_state { #endif /* TUSB mapping: "flat" plus ep0 special cases */ -#if defined(CONFIG_USB_MUSB_TUSB6010) +#if defined(CONFIG_USB_MUSB_TUSB6010) || \ + defined(CONFIG_USB_MUSB_TUSB6010_MODULE) #define musb_ep_select(_mbase, _epnum) \ musb_writeb((_mbase), MUSB_INDEX, (_epnum)) #define MUSB_EP_OFFSET MUSB_TUSB_OFFSET @@ -241,7 +242,8 @@ struct musb_hw_ep { void __iomem *fifo; void __iomem *regs; -#ifdef CONFIG_USB_MUSB_TUSB6010 +#if defined(CONFIG_USB_MUSB_TUSB6010) || \ + defined(CONFIG_USB_MUSB_TUSB6010_MODULE) void __iomem *conf; #endif @@ -258,7 +260,8 @@ struct musb_hw_ep { struct dma_channel *tx_channel; struct dma_channel *rx_channel; -#ifdef CONFIG_USB_MUSB_TUSB6010 +#if defined(CONFIG_USB_MUSB_TUSB6010) || \ + defined(CONFIG_USB_MUSB_TUSB6010_MODULE) /* TUSB has "asynchronous" and "synchronous" dma modes */ dma_addr_t fifo_async; dma_addr_t fifo_sync; @@ -356,7 +359,8 @@ struct musb { void __iomem *ctrl_base; void __iomem *mregs; -#ifdef CONFIG_USB_MUSB_TUSB6010 +#if defined(CONFIG_USB_MUSB_TUSB6010) || \ + defined(CONFIG_USB_MUSB_TUSB6010_MODULE) dma_addr_t async; dma_addr_t sync; void __iomem *sync_va; diff --git a/drivers/usb/musb/musb_regs.h b/drivers/usb/musb/musb_regs.h index 8241070..03f2655 100644 --- a/drivers/usb/musb/musb_regs.h +++ b/drivers/usb/musb/musb_regs.h @@ -234,7 +234,8 @@ #define MUSB_TESTMODE 0x0F /* 8 bit */ /* Get offset for a given FIFO from musb->mregs */ -#ifdef CONFIG_USB_MUSB_TUSB6010 +#if defined(CONFIG_USB_MUSB_TUSB6010) || \ + defined(CONFIG_USB_MUSB_TUSB6010_MODULE) #define MUSB_FIFO_OFFSET(epnum) (0x200 + ((epnum) * 0x20)) #else #define MUSB_FIFO_OFFSET(epnum) (0x20 + ((epnum) * 4)) @@ -295,7 +296,8 @@ #define MUSB_FLAT_OFFSET(_epnum, _offset) \ (0x100 + (0x10*(_epnum)) + (_offset)) -#ifdef CONFIG_USB_MUSB_TUSB6010 +#if defined(CONFIG_USB_MUSB_TUSB6010) || \ + defined(CONFIG_USB_MUSB_TUSB6010_MODULE) /* TUSB6010 EP0 configuration register is special */ #define MUSB_TUSB_OFFSET(_epnum, _offset) \ (0x10 + _offset) diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c index 9eec41f..ec14801 100644 --- a/drivers/usb/musb/tusb6010.c +++ b/drivers/usb/musb/tusb6010.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c index 07c8a73..b67b4bc 100644 --- a/drivers/usb/musb/tusb6010_omap.c +++ b/drivers/usb/musb/tusb6010_omap.c @@ -20,6 +20,7 @@ #include #include "musb_core.h" +#include "tusb6010.h" #define to_chdat(c) ((struct tusb_omap_dma_ch *)(c)->private_data) -- cgit v1.1 From ad50c1b20ff27780afbb8bcd3d153393d360419e Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Fri, 5 Aug 2011 17:33:05 +0800 Subject: usb: musb: blackfin: include prefetch head file After the prefetch/list.h restructure, drivers need to explicitly include linux/prefetch.h in order to use the prefetch() function. Otherwise, the current driver fails to build: drivers/usb/musb/blackfin.c: In function 'musb_write_fifo': drivers/usb/musb/blackfin.c:43: error: implicit declaration of function 'prefetch' Signed-off-by: Bob Liu Signed-off-by: Felipe Balbi --- drivers/usb/musb/blackfin.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c index ae8c396..5e7cfba 100644 --- a/drivers/usb/musb/blackfin.c +++ b/drivers/usb/musb/blackfin.c @@ -17,6 +17,7 @@ #include #include #include +#include #include -- cgit v1.1 From bb8070c29ca87ac0aa24e04a1207cc932f62258f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 5 Aug 2011 22:14:46 +0200 Subject: usb: gadget: f_phonet: unlock in error case Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Felipe Balbi --- drivers/usb/gadget/f_phonet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c index 8f8d3f6..8f3eab1 100644 --- a/drivers/usb/gadget/f_phonet.c +++ b/drivers/usb/gadget/f_phonet.c @@ -434,6 +434,7 @@ static int pn_set_alt(struct usb_function *f, unsigned intf, unsigned alt) config_ep_by_speed(gadget, f, fp->out_ep)) { fp->in_ep->desc = NULL; fp->out_ep->desc = NULL; + spin_unlock(&port->lock); return -EINVAL; } usb_ep_enable(fp->out_ep); -- cgit v1.1 From 6193d6997c90535af8f8491fc0019f785a3322b0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 10 Aug 2011 11:01:57 +0200 Subject: usb: musb: gadget: fix error path In case one "forgot" to load the receiver i.e. doing |modprobe omap2430 |modprobe musb_hdrc he ends up with: |musb-hdrc: version 6.0, ?dma?, otg (peripheral+host) |HS USB OTG: no transceiver configured |musb-hdrc musb-hdrc: musb_init_controller failed with status -19 |(NULL device *): gadget not registered. |Unable to handle kernel NULL pointer dereference at virtual address 0000001c |Internal error: Oops: 17 [#1] SMP |[] (sysfs_find_dirent+0x4/0x60) from [] (sysfs_get_dirent+0x28/0x78) |[] (sysfs_get_dirent+0x28/0x78) from [] (sysfs_unmerge_group+0x1c/0x90) |[] (sysfs_unmerge_group+0x1c/0x90) from [] (dpm_sysfs_remove+0x14/0x3c) |[] (dpm_sysfs_remove+0x14/0x3c) from [] (device_del+0x40/0x1b4) |[] (device_del+0x40/0x1b4) from [] (device_unregister+0xc/0x18) |[] (device_unregister+0xc/0x18) from [] (musb_free+0x24/0x88 [musb_hdrc]) |[] (musb_free+0x24/0x88 [musb_hdrc]) from [] (musb_probe+0xb50/0xe3c [musb_hdrc]) |[] (musb_probe+0xb50/0xe3c [musb_hdrc]) from [] (platform_drv_probe+0x1c/0x24) The problem is that musb_free() tries to figure out what was initializued and what wasn't and clean up only the initialized part. This works well for usb_del_gadget_udc() but device_unregister() can't deal with it. Therefore we rely on the fact the we always have a parent device and only then remove the device. I broke this in 0f91349 ("usb: gadget: convert all users to the new udc infrastructure") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_gadget.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 8c41a2e..e818203 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1856,6 +1856,7 @@ int __init musb_gadget_setup(struct musb *musb) return 0; err: + musb->g.dev.parent = NULL; device_unregister(&musb->g.dev); return status; } @@ -1863,7 +1864,8 @@ err: void musb_gadget_cleanup(struct musb *musb) { usb_del_gadget_udc(&musb->g); - device_unregister(&musb->g.dev); + if (musb->g.dev.parent) + device_unregister(&musb->g.dev); } /* -- cgit v1.1 From 0ac8e58f3818795d02ac309bd57b4d93ec283a77 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 9 Aug 2011 12:24:17 +0100 Subject: ARM: fix perf build with uclibc toolchains libio.h is not provided by uClibc, in order to be able to test the definition of __UCLIBC__ we need to include stdlib.h, which also includes stddef.h, providing the definition of 'NULL'. Signed-off-by: Florian Fainelli Signed-off-by: Will Deacon --- tools/perf/arch/arm/util/dwarf-regs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/arch/arm/util/dwarf-regs.c b/tools/perf/arch/arm/util/dwarf-regs.c index fff6450..e8d5c55 100644 --- a/tools/perf/arch/arm/util/dwarf-regs.c +++ b/tools/perf/arch/arm/util/dwarf-regs.c @@ -8,7 +8,10 @@ * published by the Free Software Foundation. */ +#include +#ifndef __UCLIBC__ #include +#endif #include struct pt_regs_dwarfnum { -- cgit v1.1 From 49bef8331afefa4dd75f7124c50bde47168f5492 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 10 Aug 2011 10:20:17 +0100 Subject: ARM: perf: fix prototype of release_pmu Commit f12482c9 ("ARM: 6974/1: pmu: refactor reservation") changed the prototype of release_pmu, but missed the stub for when CONFIG_CPU_HAS_PMU is not selected by the platform. This patch changes the prototype of the stub, preventing possible build failures when CONFIG_CPU_HAS_PMU is not selected. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/include/asm/pmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index 67c70a3..8ae32ba 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -75,7 +75,7 @@ reserve_pmu(enum arm_pmu_type device) } static inline int -release_pmu(struct platform_device *pdev) +release_pmu(enum arm_pmu_type device) { return -ENODEV; } -- cgit v1.1 From 7fdd3c49629e8aab48dbd1b2f800854b0f93cba0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 12 Aug 2011 10:42:48 +0100 Subject: ARM: perf: make name of arm_pmu_type consistent Commit f12482c9 ("ARM: 6974/1: pmu: refactor reservation") changed {release,reserve}_pmu to take an enum arm_pmu_type as a parameter, but inconsistently named the parameter `type' or `device'. It would be nice if these were consistent. This patch makes use of enum arm_pmu_type consistent, always using `type'. Related printks are updated, explicitly mentioning `type' also. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/include/asm/pmu.h | 10 +++++----- arch/arm/kernel/pmu.c | 26 +++++++++++++------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index 8ae32ba..b7e82c4 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -41,7 +41,7 @@ struct arm_pmu_platdata { * encoded error on failure. */ extern struct platform_device * -reserve_pmu(enum arm_pmu_type device); +reserve_pmu(enum arm_pmu_type type); /** * release_pmu() - Relinquish control of the performance counters @@ -62,26 +62,26 @@ release_pmu(enum arm_pmu_type type); * the actual hardware initialisation. */ extern int -init_pmu(enum arm_pmu_type device); +init_pmu(enum arm_pmu_type type); #else /* CONFIG_CPU_HAS_PMU */ #include static inline struct platform_device * -reserve_pmu(enum arm_pmu_type device) +reserve_pmu(enum arm_pmu_type type) { return ERR_PTR(-ENODEV); } static inline int -release_pmu(enum arm_pmu_type device) +release_pmu(enum arm_pmu_type type) { return -ENODEV; } static inline int -init_pmu(enum arm_pmu_type device) +init_pmu(enum arm_pmu_type type) { return -ENODEV; } diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c index 2b70709..c53474f 100644 --- a/arch/arm/kernel/pmu.c +++ b/arch/arm/kernel/pmu.c @@ -31,7 +31,7 @@ static int __devinit pmu_register(struct platform_device *pdev, { if (type < 0 || type >= ARM_NUM_PMU_DEVICES) { pr_warning("received registration request for unknown " - "device %d\n", type); + "PMU device type %d\n", type); return -EINVAL; } @@ -112,17 +112,17 @@ static int __init register_pmu_driver(void) device_initcall(register_pmu_driver); struct platform_device * -reserve_pmu(enum arm_pmu_type device) +reserve_pmu(enum arm_pmu_type type) { struct platform_device *pdev; - if (test_and_set_bit_lock(device, &pmu_lock)) { + if (test_and_set_bit_lock(type, &pmu_lock)) { pdev = ERR_PTR(-EBUSY); - } else if (pmu_devices[device] == NULL) { - clear_bit_unlock(device, &pmu_lock); + } else if (pmu_devices[type] == NULL) { + clear_bit_unlock(type, &pmu_lock); pdev = ERR_PTR(-ENODEV); } else { - pdev = pmu_devices[device]; + pdev = pmu_devices[type]; } return pdev; @@ -130,11 +130,11 @@ reserve_pmu(enum arm_pmu_type device) EXPORT_SYMBOL_GPL(reserve_pmu); int -release_pmu(enum arm_pmu_type device) +release_pmu(enum arm_pmu_type type) { - if (WARN_ON(!pmu_devices[device])) + if (WARN_ON(!pmu_devices[type])) return -EINVAL; - clear_bit_unlock(device, &pmu_lock); + clear_bit_unlock(type, &pmu_lock); return 0; } EXPORT_SYMBOL_GPL(release_pmu); @@ -182,17 +182,17 @@ init_cpu_pmu(void) } int -init_pmu(enum arm_pmu_type device) +init_pmu(enum arm_pmu_type type) { int err = 0; - switch (device) { + switch (type) { case ARM_PMU_DEVICE_CPU: err = init_cpu_pmu(); break; default: - pr_warning("attempt to initialise unknown device %d\n", - device); + pr_warning("attempt to initialise PMU of unknown " + "type %d\n", type); err = -EINVAL; } -- cgit v1.1 From 5cb843ca0f781b62dc9793b26926d0b8efef5576 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 20 Jul 2011 11:57:03 +0100 Subject: ARM: realview: ensure visibility of writes during reset The various reset routines in mach-realview rely on an FPGA to power-cycle the board after writing some magic runes to memory-mapped registers. This patch adds a dsb() following the writes, so that they become visible before we mdelay(1000) in the arch_reset code. Without this patch, the timeout would expire sporadically, causing the reset to fail. Signed-off-by: Will Deacon --- arch/arm/mach-realview/include/mach/system.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-realview/include/mach/system.h b/arch/arm/mach-realview/include/mach/system.h index a30f2e3..6657ff23 100644 --- a/arch/arm/mach-realview/include/mach/system.h +++ b/arch/arm/mach-realview/include/mach/system.h @@ -44,6 +44,7 @@ static inline void arch_reset(char mode, const char *cmd) */ if (realview_reset) realview_reset(mode); + dsb(); } #endif -- cgit v1.1 From dfc40b24c0a37593724f3317cd485c73ee878c18 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 20 Jul 2011 14:18:46 +0100 Subject: ARM: twd: register clockevents device before enabling PPI The smp_twd clockevents driver currently enables the local timer PPI before the clockevents device is registered. This can lead to a kernel panic if a spurious timer interrupt is generated before registration has completed since the kernel will treat it as an IPI timer. This patch moves the clockevents device registration before the IRQ unmasking so that we can always handle timer interrupts once they can occur. Acked-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/kernel/smp_twd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 2c277d4..01c1862 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -137,8 +137,8 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk) clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk); clk->min_delta_ns = clockevent_delta2ns(0xf, clk); + clockevents_register_device(clk); + /* Make sure our local interrupt controller has this enabled */ gic_enable_ppi(clk->irq); - - clockevents_register_device(clk); } -- cgit v1.1 From 72dc53acd50db066a5a5ebe1f39fae73d7e62aa8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Aug 2011 12:37:04 +0100 Subject: ARM: cache: detect VIPT aliasing I-cache on ARMv6 The current cache detection code does not check for an aliasing I-cache if the D-cache is found to be VIPT aliasing. This patch fixes the problem by always checking for an aliasing I-cache on v6 and later. Signed-off-by: Will Deacon --- arch/arm/kernel/setup.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 70bca64..e514c76 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -280,18 +280,19 @@ static void __init cacheid_init(void) if (arch >= CPU_ARCH_ARMv6) { if ((cachetype & (7 << 29)) == 4 << 29) { /* ARMv7 register format */ + arch = CPU_ARCH_ARMv7; cacheid = CACHEID_VIPT_NONALIASING; if ((cachetype & (3 << 14)) == 1 << 14) cacheid |= CACHEID_ASID_TAGGED; - else if (cpu_has_aliasing_icache(CPU_ARCH_ARMv7)) - cacheid |= CACHEID_VIPT_I_ALIASING; - } else if (cachetype & (1 << 23)) { - cacheid = CACHEID_VIPT_ALIASING; } else { - cacheid = CACHEID_VIPT_NONALIASING; - if (cpu_has_aliasing_icache(CPU_ARCH_ARMv6)) - cacheid |= CACHEID_VIPT_I_ALIASING; + arch = CPU_ARCH_ARMv6; + if (cachetype & (1 << 23)) + cacheid = CACHEID_VIPT_ALIASING; + else + cacheid = CACHEID_VIPT_NONALIASING; } + if (cpu_has_aliasing_icache(arch)) + cacheid |= CACHEID_VIPT_I_ALIASING; } else { cacheid = CACHEID_VIVT; } -- cgit v1.1 From f8afdf481f0fef5e170c6c928cec42879d505654 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 12 Aug 2011 13:31:30 -0400 Subject: drivers/net/wireless/wl12xx: add missing kfree In each case, the freed data should be freed in the error handling code as well. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @exists@ local idexpression x; statement S,S1; expression E; identifier fl; expression *ptr != NULL; @@ x = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S <... when != x when != if (...) { <+...kfree(x)...+> } when any when != true x == NULL x->fl ...> ( if (x == NULL) S1 | if (...) { ... when != x when forall ( return \(0\|<+...x...+>\|ptr\); | * return ...; ) } ) // Signed-off-by: Julia Lawall Acked-by: Luciano Coelho Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/acx.c | 6 +----- drivers/net/wireless/wl12xx/testmode.c | 5 ++++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/wl12xx/acx.c b/drivers/net/wireless/wl12xx/acx.c index 7e33f1f..34f6ab5 100644 --- a/drivers/net/wireless/wl12xx/acx.c +++ b/drivers/net/wireless/wl12xx/acx.c @@ -77,8 +77,6 @@ int wl1271_acx_sleep_auth(struct wl1271 *wl, u8 sleep_auth) auth->sleep_auth = sleep_auth; ret = wl1271_cmd_configure(wl, ACX_SLEEP_AUTH, auth, sizeof(*auth)); - if (ret < 0) - return ret; out: kfree(auth); @@ -624,10 +622,8 @@ int wl1271_acx_cca_threshold(struct wl1271 *wl) ret = wl1271_cmd_configure(wl, ACX_CCA_THRESHOLD, detection, sizeof(*detection)); - if (ret < 0) { + if (ret < 0) wl1271_warning("failed to set cca threshold: %d", ret); - return ret; - } out: kfree(detection); diff --git a/drivers/net/wireless/wl12xx/testmode.c b/drivers/net/wireless/wl12xx/testmode.c index 5d5e1ef..88add68 100644 --- a/drivers/net/wireless/wl12xx/testmode.c +++ b/drivers/net/wireless/wl12xx/testmode.c @@ -139,12 +139,15 @@ static int wl1271_tm_cmd_interrogate(struct wl1271 *wl, struct nlattr *tb[]) if (ret < 0) { wl1271_warning("testmode cmd interrogate failed: %d", ret); + kfree(cmd); return ret; } skb = cfg80211_testmode_alloc_reply_skb(wl->hw->wiphy, sizeof(*cmd)); - if (!skb) + if (!skb) { + kfree(cmd); return -ENOMEM; + } NLA_PUT(skb, WL1271_TM_ATTR_DATA, sizeof(*cmd), cmd); -- cgit v1.1 From 06f8e2d6754dc631732415b741b5aa58a0f7133f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 12 Aug 2011 13:57:55 -0500 Subject: xfs: don't expect xfs headers to be in subdirectories Fix up some #include directives in preparation for moving a few header files out of xfs source subdirectories. Note that "xfs_linux.h" also got its quoting convention for included files switched. Signed-off-by: Alex Elder Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 2 +- fs/xfs/linux-2.6/xfs_linux.h | 27 +++++++++++++-------------- fs/xfs/linux-2.6/xfs_quotaops.c | 2 +- fs/xfs/linux-2.6/xfs_trace.c | 4 ++-- fs/xfs/xfs.h | 3 ++- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 75bb316..b100cf4 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -16,7 +16,7 @@ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # -ccflags-y := -I$(src) -I$(src)/linux-2.6 +ccflags-y := -I$(src) -I$(src)/linux-2.6 -I$(src)/quota -I$(src)/support ccflags-$(CONFIG_XFS_DEBUG) += -g XFS_LINUX := linux-2.6 diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index d42f814..1e8a45e 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -32,13 +32,12 @@ # define XFS_BIG_INUMS 0 #endif -#include +#include "xfs_types.h" -#include -#include -#include - -#include +#include "kmem.h" +#include "mrlock.h" +#include "time.h" +#include "uuid.h" #include #include @@ -78,14 +77,14 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include +#include "xfs_vnode.h" +#include "xfs_stats.h" +#include "xfs_sysctl.h" +#include "xfs_iops.h" +#include "xfs_aops.h" +#include "xfs_super.h" +#include "xfs_buf.h" +#include "xfs_message.h" #ifdef __BIG_ENDIAN #define XFS_NATIVE_HOST 1 diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 29b9d64..7e76f53 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -25,7 +25,7 @@ #include "xfs_trans.h" #include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "quota/xfs_qm.h" +#include "xfs_qm.h" #include diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c index 88d25d4..9010ce8 100644 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ b/fs/xfs/linux-2.6/xfs_trace.c @@ -43,8 +43,8 @@ #include "xfs_quota.h" #include "xfs_iomap.h" #include "xfs_aops.h" -#include "quota/xfs_dquot_item.h" -#include "quota/xfs_dquot.h" +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" #include "xfs_log_recover.h" #include "xfs_inode_item.h" diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 53ec3ea..d8b11b7 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -24,5 +24,6 @@ #define XFS_BUF_LOCK_TRACKING 1 #endif -#include +#include "xfs_linux.h" + #endif /* __XFS_H__ */ -- cgit v1.1 From c59d87c460767bc35dafd490139d3cfe78fb8da4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 Aug 2011 16:21:35 -0500 Subject: xfs: remove subdirectories Use the move from Linux 2.6 to Linux 3.x as an excuse to kill the annoying subdirectories in the XFS source code. Besides the large amount of file rename the only changes are to the Makefile, a few files including headers with the subdirectory prefix, and the binary sysctl compat code that includes a header under fs/xfs/ from kernel/. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/Makefile | 117 +- fs/xfs/kmem.c | 132 +++ fs/xfs/kmem.h | 124 ++ fs/xfs/linux-2.6/kmem.c | 132 --- fs/xfs/linux-2.6/kmem.h | 124 -- fs/xfs/linux-2.6/mrlock.h | 90 -- fs/xfs/linux-2.6/time.h | 36 - fs/xfs/linux-2.6/xfs_acl.c | 420 ------- fs/xfs/linux-2.6/xfs_aops.c | 1499 ------------------------ fs/xfs/linux-2.6/xfs_aops.h | 68 -- fs/xfs/linux-2.6/xfs_buf.c | 1876 ------------------------------ fs/xfs/linux-2.6/xfs_buf.h | 326 ------ fs/xfs/linux-2.6/xfs_discard.c | 222 ---- fs/xfs/linux-2.6/xfs_discard.h | 10 - fs/xfs/linux-2.6/xfs_export.c | 250 ---- fs/xfs/linux-2.6/xfs_export.h | 72 -- fs/xfs/linux-2.6/xfs_file.c | 1096 ------------------ fs/xfs/linux-2.6/xfs_fs_subr.c | 96 -- fs/xfs/linux-2.6/xfs_globals.c | 43 - fs/xfs/linux-2.6/xfs_ioctl.c | 1556 ------------------------- fs/xfs/linux-2.6/xfs_ioctl.h | 85 -- fs/xfs/linux-2.6/xfs_ioctl32.c | 672 ----------- fs/xfs/linux-2.6/xfs_ioctl32.h | 237 ---- fs/xfs/linux-2.6/xfs_iops.c | 1210 -------------------- fs/xfs/linux-2.6/xfs_iops.h | 30 - fs/xfs/linux-2.6/xfs_linux.h | 309 ----- fs/xfs/linux-2.6/xfs_message.c | 108 -- fs/xfs/linux-2.6/xfs_message.h | 39 - fs/xfs/linux-2.6/xfs_quotaops.c | 139 --- fs/xfs/linux-2.6/xfs_stats.c | 122 -- fs/xfs/linux-2.6/xfs_stats.h | 223 ---- fs/xfs/linux-2.6/xfs_super.c | 1773 ---------------------------- fs/xfs/linux-2.6/xfs_super.h | 87 -- fs/xfs/linux-2.6/xfs_sync.c | 1065 ----------------- fs/xfs/linux-2.6/xfs_sync.h | 51 - fs/xfs/linux-2.6/xfs_sysctl.c | 252 ---- fs/xfs/linux-2.6/xfs_sysctl.h | 102 -- fs/xfs/linux-2.6/xfs_trace.c | 56 - fs/xfs/linux-2.6/xfs_trace.h | 1746 ---------------------------- fs/xfs/linux-2.6/xfs_vnode.h | 64 -- fs/xfs/linux-2.6/xfs_xattr.c | 241 ---- fs/xfs/mrlock.h | 90 ++ fs/xfs/quota/xfs_dquot.c | 1454 ----------------------- fs/xfs/quota/xfs_dquot.h | 137 --- fs/xfs/quota/xfs_dquot_item.c | 529 --------- fs/xfs/quota/xfs_dquot_item.h | 48 - fs/xfs/quota/xfs_qm.c | 2416 --------------------------------------- fs/xfs/quota/xfs_qm.h | 166 --- fs/xfs/quota/xfs_qm_bhv.c | 176 --- fs/xfs/quota/xfs_qm_stats.c | 105 -- fs/xfs/quota/xfs_qm_stats.h | 53 - fs/xfs/quota/xfs_qm_syscalls.c | 906 --------------- fs/xfs/quota/xfs_quota_priv.h | 53 - fs/xfs/quota/xfs_trans_dquot.c | 890 -------------- fs/xfs/support/uuid.c | 63 - fs/xfs/support/uuid.h | 29 - fs/xfs/time.h | 36 + fs/xfs/uuid.c | 63 + fs/xfs/uuid.h | 29 + fs/xfs/xfs_acl.c | 420 +++++++ fs/xfs/xfs_aops.c | 1499 ++++++++++++++++++++++++ fs/xfs/xfs_aops.h | 68 ++ fs/xfs/xfs_buf.c | 1876 ++++++++++++++++++++++++++++++ fs/xfs/xfs_buf.h | 326 ++++++ fs/xfs/xfs_discard.c | 222 ++++ fs/xfs/xfs_discard.h | 10 + fs/xfs/xfs_dquot.c | 1454 +++++++++++++++++++++++ fs/xfs/xfs_dquot.h | 137 +++ fs/xfs/xfs_dquot_item.c | 529 +++++++++ fs/xfs/xfs_dquot_item.h | 48 + fs/xfs/xfs_export.c | 250 ++++ fs/xfs/xfs_export.h | 72 ++ fs/xfs/xfs_file.c | 1096 ++++++++++++++++++ fs/xfs/xfs_fs_subr.c | 96 ++ fs/xfs/xfs_globals.c | 43 + fs/xfs/xfs_ioctl.c | 1556 +++++++++++++++++++++++++ fs/xfs/xfs_ioctl.h | 85 ++ fs/xfs/xfs_ioctl32.c | 672 +++++++++++ fs/xfs/xfs_ioctl32.h | 237 ++++ fs/xfs/xfs_iops.c | 1210 ++++++++++++++++++++ fs/xfs/xfs_iops.h | 30 + fs/xfs/xfs_linux.h | 309 +++++ fs/xfs/xfs_message.c | 108 ++ fs/xfs/xfs_message.h | 39 + fs/xfs/xfs_qm.c | 2416 +++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_qm.h | 166 +++ fs/xfs/xfs_qm_bhv.c | 176 +++ fs/xfs/xfs_qm_stats.c | 105 ++ fs/xfs/xfs_qm_stats.h | 53 + fs/xfs/xfs_qm_syscalls.c | 906 +++++++++++++++ fs/xfs/xfs_quota_priv.h | 53 + fs/xfs/xfs_quotaops.c | 139 +++ fs/xfs/xfs_stats.c | 122 ++ fs/xfs/xfs_stats.h | 223 ++++ fs/xfs/xfs_super.c | 1773 ++++++++++++++++++++++++++++ fs/xfs/xfs_super.h | 87 ++ fs/xfs/xfs_sync.c | 1065 +++++++++++++++++ fs/xfs/xfs_sync.h | 51 + fs/xfs/xfs_sysctl.c | 252 ++++ fs/xfs/xfs_sysctl.h | 102 ++ fs/xfs/xfs_trace.c | 56 + fs/xfs/xfs_trace.h | 1746 ++++++++++++++++++++++++++++ fs/xfs/xfs_trans_dquot.c | 890 ++++++++++++++ fs/xfs/xfs_vnode.h | 64 ++ fs/xfs/xfs_xattr.c | 241 ++++ kernel/sysctl_binary.c | 2 +- kernel/sysctl_check.c | 2 +- 107 files changed, 23610 insertions(+), 23615 deletions(-) create mode 100644 fs/xfs/kmem.c create mode 100644 fs/xfs/kmem.h delete mode 100644 fs/xfs/linux-2.6/kmem.c delete mode 100644 fs/xfs/linux-2.6/kmem.h delete mode 100644 fs/xfs/linux-2.6/mrlock.h delete mode 100644 fs/xfs/linux-2.6/time.h delete mode 100644 fs/xfs/linux-2.6/xfs_acl.c delete mode 100644 fs/xfs/linux-2.6/xfs_aops.c delete mode 100644 fs/xfs/linux-2.6/xfs_aops.h delete mode 100644 fs/xfs/linux-2.6/xfs_buf.c delete mode 100644 fs/xfs/linux-2.6/xfs_buf.h delete mode 100644 fs/xfs/linux-2.6/xfs_discard.c delete mode 100644 fs/xfs/linux-2.6/xfs_discard.h delete mode 100644 fs/xfs/linux-2.6/xfs_export.c delete mode 100644 fs/xfs/linux-2.6/xfs_export.h delete mode 100644 fs/xfs/linux-2.6/xfs_file.c delete mode 100644 fs/xfs/linux-2.6/xfs_fs_subr.c delete mode 100644 fs/xfs/linux-2.6/xfs_globals.c delete mode 100644 fs/xfs/linux-2.6/xfs_ioctl.c delete mode 100644 fs/xfs/linux-2.6/xfs_ioctl.h delete mode 100644 fs/xfs/linux-2.6/xfs_ioctl32.c delete mode 100644 fs/xfs/linux-2.6/xfs_ioctl32.h delete mode 100644 fs/xfs/linux-2.6/xfs_iops.c delete mode 100644 fs/xfs/linux-2.6/xfs_iops.h delete mode 100644 fs/xfs/linux-2.6/xfs_linux.h delete mode 100644 fs/xfs/linux-2.6/xfs_message.c delete mode 100644 fs/xfs/linux-2.6/xfs_message.h delete mode 100644 fs/xfs/linux-2.6/xfs_quotaops.c delete mode 100644 fs/xfs/linux-2.6/xfs_stats.c delete mode 100644 fs/xfs/linux-2.6/xfs_stats.h delete mode 100644 fs/xfs/linux-2.6/xfs_super.c delete mode 100644 fs/xfs/linux-2.6/xfs_super.h delete mode 100644 fs/xfs/linux-2.6/xfs_sync.c delete mode 100644 fs/xfs/linux-2.6/xfs_sync.h delete mode 100644 fs/xfs/linux-2.6/xfs_sysctl.c delete mode 100644 fs/xfs/linux-2.6/xfs_sysctl.h delete mode 100644 fs/xfs/linux-2.6/xfs_trace.c delete mode 100644 fs/xfs/linux-2.6/xfs_trace.h delete mode 100644 fs/xfs/linux-2.6/xfs_vnode.h delete mode 100644 fs/xfs/linux-2.6/xfs_xattr.c create mode 100644 fs/xfs/mrlock.h delete mode 100644 fs/xfs/quota/xfs_dquot.c delete mode 100644 fs/xfs/quota/xfs_dquot.h delete mode 100644 fs/xfs/quota/xfs_dquot_item.c delete mode 100644 fs/xfs/quota/xfs_dquot_item.h delete mode 100644 fs/xfs/quota/xfs_qm.c delete mode 100644 fs/xfs/quota/xfs_qm.h delete mode 100644 fs/xfs/quota/xfs_qm_bhv.c delete mode 100644 fs/xfs/quota/xfs_qm_stats.c delete mode 100644 fs/xfs/quota/xfs_qm_stats.h delete mode 100644 fs/xfs/quota/xfs_qm_syscalls.c delete mode 100644 fs/xfs/quota/xfs_quota_priv.h delete mode 100644 fs/xfs/quota/xfs_trans_dquot.c delete mode 100644 fs/xfs/support/uuid.c delete mode 100644 fs/xfs/support/uuid.h create mode 100644 fs/xfs/time.h create mode 100644 fs/xfs/uuid.c create mode 100644 fs/xfs/uuid.h create mode 100644 fs/xfs/xfs_acl.c create mode 100644 fs/xfs/xfs_aops.c create mode 100644 fs/xfs/xfs_aops.h create mode 100644 fs/xfs/xfs_buf.c create mode 100644 fs/xfs/xfs_buf.h create mode 100644 fs/xfs/xfs_discard.c create mode 100644 fs/xfs/xfs_discard.h create mode 100644 fs/xfs/xfs_dquot.c create mode 100644 fs/xfs/xfs_dquot.h create mode 100644 fs/xfs/xfs_dquot_item.c create mode 100644 fs/xfs/xfs_dquot_item.h create mode 100644 fs/xfs/xfs_export.c create mode 100644 fs/xfs/xfs_export.h create mode 100644 fs/xfs/xfs_file.c create mode 100644 fs/xfs/xfs_fs_subr.c create mode 100644 fs/xfs/xfs_globals.c create mode 100644 fs/xfs/xfs_ioctl.c create mode 100644 fs/xfs/xfs_ioctl.h create mode 100644 fs/xfs/xfs_ioctl32.c create mode 100644 fs/xfs/xfs_ioctl32.h create mode 100644 fs/xfs/xfs_iops.c create mode 100644 fs/xfs/xfs_iops.h create mode 100644 fs/xfs/xfs_linux.h create mode 100644 fs/xfs/xfs_message.c create mode 100644 fs/xfs/xfs_message.h create mode 100644 fs/xfs/xfs_qm.c create mode 100644 fs/xfs/xfs_qm.h create mode 100644 fs/xfs/xfs_qm_bhv.c create mode 100644 fs/xfs/xfs_qm_stats.c create mode 100644 fs/xfs/xfs_qm_stats.h create mode 100644 fs/xfs/xfs_qm_syscalls.c create mode 100644 fs/xfs/xfs_quota_priv.h create mode 100644 fs/xfs/xfs_quotaops.c create mode 100644 fs/xfs/xfs_stats.c create mode 100644 fs/xfs/xfs_stats.h create mode 100644 fs/xfs/xfs_super.c create mode 100644 fs/xfs/xfs_super.h create mode 100644 fs/xfs/xfs_sync.c create mode 100644 fs/xfs/xfs_sync.h create mode 100644 fs/xfs/xfs_sysctl.c create mode 100644 fs/xfs/xfs_sysctl.h create mode 100644 fs/xfs/xfs_trace.c create mode 100644 fs/xfs/xfs_trace.h create mode 100644 fs/xfs/xfs_trans_dquot.c create mode 100644 fs/xfs/xfs_vnode.h create mode 100644 fs/xfs/xfs_xattr.c diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index b100cf4..ffce328 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -16,44 +16,51 @@ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # -ccflags-y := -I$(src) -I$(src)/linux-2.6 -I$(src)/quota -I$(src)/support ccflags-$(CONFIG_XFS_DEBUG) += -g -XFS_LINUX := linux-2.6 - obj-$(CONFIG_XFS_FS) += xfs.o -xfs-y += linux-2.6/xfs_trace.o - -xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ - xfs_dquot.o \ - xfs_dquot_item.o \ - xfs_trans_dquot.o \ - xfs_qm_syscalls.o \ - xfs_qm_bhv.o \ - xfs_qm.o) -xfs-$(CONFIG_XFS_QUOTA) += linux-2.6/xfs_quotaops.o - -ifeq ($(CONFIG_XFS_QUOTA),y) -xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o -endif - -xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o -xfs-$(CONFIG_XFS_POSIX_ACL) += $(XFS_LINUX)/xfs_acl.o -xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o -xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o -xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o +# this one should be compiled first, as the tracing macros can easily blow up +xfs-y += xfs_trace.o +# highlevel code +xfs-y += xfs_aops.o \ + xfs_bit.o \ + xfs_buf.o \ + xfs_dfrag.o \ + xfs_discard.o \ + xfs_error.o \ + xfs_export.o \ + xfs_file.o \ + xfs_filestream.o \ + xfs_fsops.o \ + xfs_fs_subr.o \ + xfs_globals.o \ + xfs_iget.o \ + xfs_ioctl.o \ + xfs_iomap.o \ + xfs_iops.o \ + xfs_itable.o \ + xfs_message.o \ + xfs_mru_cache.o \ + xfs_super.o \ + xfs_sync.o \ + xfs_xattr.o \ + xfs_rename.o \ + xfs_rw.o \ + xfs_utils.o \ + xfs_vnodeops.o \ + kmem.o \ + uuid.o +# code shared with libxfs xfs-y += xfs_alloc.o \ xfs_alloc_btree.o \ xfs_attr.o \ xfs_attr_leaf.o \ - xfs_bit.o \ xfs_bmap.o \ xfs_bmap_btree.o \ xfs_btree.o \ - xfs_buf_item.o \ xfs_da_btree.o \ xfs_dir2.o \ xfs_dir2_block.o \ @@ -61,49 +68,37 @@ xfs-y += xfs_alloc.o \ xfs_dir2_leaf.o \ xfs_dir2_node.o \ xfs_dir2_sf.o \ - xfs_error.o \ - xfs_extfree_item.o \ - xfs_filestream.o \ - xfs_fsops.o \ xfs_ialloc.o \ xfs_ialloc_btree.o \ - xfs_iget.o \ xfs_inode.o \ - xfs_inode_item.o \ - xfs_iomap.o \ - xfs_itable.o \ - xfs_dfrag.o \ - xfs_log.o \ - xfs_log_cil.o \ xfs_log_recover.o \ xfs_mount.o \ - xfs_mru_cache.o \ - xfs_rename.o \ - xfs_trans.o \ + xfs_trans.o + +# low-level transaction/log code +xfs-y += xfs_log.o \ + xfs_log_cil.o \ + xfs_buf_item.o \ + xfs_extfree_item.o \ + xfs_inode_item.o \ xfs_trans_ail.o \ xfs_trans_buf.o \ xfs_trans_extfree.o \ xfs_trans_inode.o \ - xfs_utils.o \ - xfs_vnodeops.o \ - xfs_rw.o - -# Objects in linux/ -xfs-y += $(addprefix $(XFS_LINUX)/, \ - kmem.o \ - xfs_aops.o \ - xfs_buf.o \ - xfs_discard.o \ - xfs_export.o \ - xfs_file.o \ - xfs_fs_subr.o \ - xfs_globals.o \ - xfs_ioctl.o \ - xfs_iops.o \ - xfs_message.o \ - xfs_super.o \ - xfs_sync.o \ - xfs_xattr.o) -# Objects in support/ -xfs-y += support/uuid.o +# optional features +xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ + xfs_dquot_item.o \ + xfs_trans_dquot.o \ + xfs_qm_syscalls.o \ + xfs_qm_bhv.o \ + xfs_qm.o \ + xfs_quotaops.o +ifeq ($(CONFIG_XFS_QUOTA),y) +xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o +endif +xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o +xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o +xfs-$(CONFIG_PROC_FS) += xfs_stats.o +xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o +xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c new file mode 100644 index 0000000..a907de5 --- /dev/null +++ b/fs/xfs/kmem.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include +#include +#include "time.h" +#include "kmem.h" +#include "xfs_message.h" + +/* + * Greedy allocation. May fail and may return vmalloced memory. + * + * Must be freed using kmem_free_large. + */ +void * +kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) +{ + void *ptr; + size_t kmsize = maxsize; + + while (!(ptr = kmem_zalloc_large(kmsize))) { + if ((kmsize >>= 1) <= minsize) + kmsize = minsize; + } + if (ptr) + *size = kmsize; + return ptr; +} + +void * +kmem_alloc(size_t size, unsigned int __nocast flags) +{ + int retries = 0; + gfp_t lflags = kmem_flags_convert(flags); + void *ptr; + + do { + ptr = kmalloc(size, lflags); + if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) + return ptr; + if (!(++retries % 100)) + xfs_err(NULL, + "possible memory allocation deadlock in %s (mode:0x%x)", + __func__, lflags); + congestion_wait(BLK_RW_ASYNC, HZ/50); + } while (1); +} + +void * +kmem_zalloc(size_t size, unsigned int __nocast flags) +{ + void *ptr; + + ptr = kmem_alloc(size, flags); + if (ptr) + memset((char *)ptr, 0, (int)size); + return ptr; +} + +void +kmem_free(const void *ptr) +{ + if (!is_vmalloc_addr(ptr)) { + kfree(ptr); + } else { + vfree(ptr); + } +} + +void * +kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, + unsigned int __nocast flags) +{ + void *new; + + new = kmem_alloc(newsize, flags); + if (ptr) { + if (new) + memcpy(new, ptr, + ((oldsize < newsize) ? oldsize : newsize)); + kmem_free(ptr); + } + return new; +} + +void * +kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) +{ + int retries = 0; + gfp_t lflags = kmem_flags_convert(flags); + void *ptr; + + do { + ptr = kmem_cache_alloc(zone, lflags); + if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) + return ptr; + if (!(++retries % 100)) + xfs_err(NULL, + "possible memory allocation deadlock in %s (mode:0x%x)", + __func__, lflags); + congestion_wait(BLK_RW_ASYNC, HZ/50); + } while (1); +} + +void * +kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) +{ + void *ptr; + + ptr = kmem_zone_alloc(zone, flags); + if (ptr) + memset((char *)ptr, 0, kmem_cache_size(zone)); + return ptr; +} diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h new file mode 100644 index 0000000..f7c8f7a --- /dev/null +++ b/fs/xfs/kmem.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SUPPORT_KMEM_H__ +#define __XFS_SUPPORT_KMEM_H__ + +#include +#include +#include +#include + +/* + * General memory allocation interfaces + */ + +#define KM_SLEEP 0x0001u +#define KM_NOSLEEP 0x0002u +#define KM_NOFS 0x0004u +#define KM_MAYFAIL 0x0008u + +/* + * We use a special process flag to avoid recursive callbacks into + * the filesystem during transactions. We will also issue our own + * warnings, so we explicitly skip any generic ones (silly of us). + */ +static inline gfp_t +kmem_flags_convert(unsigned int __nocast flags) +{ + gfp_t lflags; + + BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); + + if (flags & KM_NOSLEEP) { + lflags = GFP_ATOMIC | __GFP_NOWARN; + } else { + lflags = GFP_KERNEL | __GFP_NOWARN; + if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) + lflags &= ~__GFP_FS; + } + return lflags; +} + +extern void *kmem_alloc(size_t, unsigned int __nocast); +extern void *kmem_zalloc(size_t, unsigned int __nocast); +extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); +extern void kmem_free(const void *); + +static inline void *kmem_zalloc_large(size_t size) +{ + void *ptr; + + ptr = vmalloc(size); + if (ptr) + memset(ptr, 0, size); + return ptr; +} +static inline void kmem_free_large(void *ptr) +{ + vfree(ptr); +} + +extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); + +/* + * Zone interfaces + */ + +#define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN +#define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT +#define KM_ZONE_SPREAD SLAB_MEM_SPREAD + +#define kmem_zone kmem_cache +#define kmem_zone_t struct kmem_cache + +static inline kmem_zone_t * +kmem_zone_init(int size, char *zone_name) +{ + return kmem_cache_create(zone_name, size, 0, 0, NULL); +} + +static inline kmem_zone_t * +kmem_zone_init_flags(int size, char *zone_name, unsigned long flags, + void (*construct)(void *)) +{ + return kmem_cache_create(zone_name, size, 0, flags, construct); +} + +static inline void +kmem_zone_free(kmem_zone_t *zone, void *ptr) +{ + kmem_cache_free(zone, ptr); +} + +static inline void +kmem_zone_destroy(kmem_zone_t *zone) +{ + if (zone) + kmem_cache_destroy(zone); +} + +extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); +extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); + +static inline int +kmem_shake_allow(gfp_t gfp_mask) +{ + return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)); +} + +#endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c deleted file mode 100644 index a907de5..0000000 --- a/fs/xfs/linux-2.6/kmem.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include -#include -#include -#include -#include -#include -#include "time.h" -#include "kmem.h" -#include "xfs_message.h" - -/* - * Greedy allocation. May fail and may return vmalloced memory. - * - * Must be freed using kmem_free_large. - */ -void * -kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) -{ - void *ptr; - size_t kmsize = maxsize; - - while (!(ptr = kmem_zalloc_large(kmsize))) { - if ((kmsize >>= 1) <= minsize) - kmsize = minsize; - } - if (ptr) - *size = kmsize; - return ptr; -} - -void * -kmem_alloc(size_t size, unsigned int __nocast flags) -{ - int retries = 0; - gfp_t lflags = kmem_flags_convert(flags); - void *ptr; - - do { - ptr = kmalloc(size, lflags); - if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) - return ptr; - if (!(++retries % 100)) - xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", - __func__, lflags); - congestion_wait(BLK_RW_ASYNC, HZ/50); - } while (1); -} - -void * -kmem_zalloc(size_t size, unsigned int __nocast flags) -{ - void *ptr; - - ptr = kmem_alloc(size, flags); - if (ptr) - memset((char *)ptr, 0, (int)size); - return ptr; -} - -void -kmem_free(const void *ptr) -{ - if (!is_vmalloc_addr(ptr)) { - kfree(ptr); - } else { - vfree(ptr); - } -} - -void * -kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, - unsigned int __nocast flags) -{ - void *new; - - new = kmem_alloc(newsize, flags); - if (ptr) { - if (new) - memcpy(new, ptr, - ((oldsize < newsize) ? oldsize : newsize)); - kmem_free(ptr); - } - return new; -} - -void * -kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) -{ - int retries = 0; - gfp_t lflags = kmem_flags_convert(flags); - void *ptr; - - do { - ptr = kmem_cache_alloc(zone, lflags); - if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) - return ptr; - if (!(++retries % 100)) - xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", - __func__, lflags); - congestion_wait(BLK_RW_ASYNC, HZ/50); - } while (1); -} - -void * -kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) -{ - void *ptr; - - ptr = kmem_zone_alloc(zone, flags); - if (ptr) - memset((char *)ptr, 0, kmem_cache_size(zone)); - return ptr; -} diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h deleted file mode 100644 index f7c8f7a..0000000 --- a/fs/xfs/linux-2.6/kmem.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_KMEM_H__ -#define __XFS_SUPPORT_KMEM_H__ - -#include -#include -#include -#include - -/* - * General memory allocation interfaces - */ - -#define KM_SLEEP 0x0001u -#define KM_NOSLEEP 0x0002u -#define KM_NOFS 0x0004u -#define KM_MAYFAIL 0x0008u - -/* - * We use a special process flag to avoid recursive callbacks into - * the filesystem during transactions. We will also issue our own - * warnings, so we explicitly skip any generic ones (silly of us). - */ -static inline gfp_t -kmem_flags_convert(unsigned int __nocast flags) -{ - gfp_t lflags; - - BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); - - if (flags & KM_NOSLEEP) { - lflags = GFP_ATOMIC | __GFP_NOWARN; - } else { - lflags = GFP_KERNEL | __GFP_NOWARN; - if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) - lflags &= ~__GFP_FS; - } - return lflags; -} - -extern void *kmem_alloc(size_t, unsigned int __nocast); -extern void *kmem_zalloc(size_t, unsigned int __nocast); -extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); -extern void kmem_free(const void *); - -static inline void *kmem_zalloc_large(size_t size) -{ - void *ptr; - - ptr = vmalloc(size); - if (ptr) - memset(ptr, 0, size); - return ptr; -} -static inline void kmem_free_large(void *ptr) -{ - vfree(ptr); -} - -extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); - -/* - * Zone interfaces - */ - -#define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN -#define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT -#define KM_ZONE_SPREAD SLAB_MEM_SPREAD - -#define kmem_zone kmem_cache -#define kmem_zone_t struct kmem_cache - -static inline kmem_zone_t * -kmem_zone_init(int size, char *zone_name) -{ - return kmem_cache_create(zone_name, size, 0, 0, NULL); -} - -static inline kmem_zone_t * -kmem_zone_init_flags(int size, char *zone_name, unsigned long flags, - void (*construct)(void *)) -{ - return kmem_cache_create(zone_name, size, 0, flags, construct); -} - -static inline void -kmem_zone_free(kmem_zone_t *zone, void *ptr) -{ - kmem_cache_free(zone, ptr); -} - -static inline void -kmem_zone_destroy(kmem_zone_t *zone) -{ - if (zone) - kmem_cache_destroy(zone); -} - -extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); -extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); - -static inline int -kmem_shake_allow(gfp_t gfp_mask) -{ - return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)); -} - -#endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h deleted file mode 100644 index ff6a198..0000000 --- a/fs/xfs/linux-2.6/mrlock.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_MRLOCK_H__ -#define __XFS_SUPPORT_MRLOCK_H__ - -#include - -typedef struct { - struct rw_semaphore mr_lock; -#ifdef DEBUG - int mr_writer; -#endif -} mrlock_t; - -#ifdef DEBUG -#define mrinit(mrp, name) \ - do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0) -#else -#define mrinit(mrp, name) \ - do { init_rwsem(&(mrp)->mr_lock); } while (0) -#endif - -#define mrlock_init(mrp, t,n,s) mrinit(mrp, n) -#define mrfree(mrp) do { } while (0) - -static inline void mraccess_nested(mrlock_t *mrp, int subclass) -{ - down_read_nested(&mrp->mr_lock, subclass); -} - -static inline void mrupdate_nested(mrlock_t *mrp, int subclass) -{ - down_write_nested(&mrp->mr_lock, subclass); -#ifdef DEBUG - mrp->mr_writer = 1; -#endif -} - -static inline int mrtryaccess(mrlock_t *mrp) -{ - return down_read_trylock(&mrp->mr_lock); -} - -static inline int mrtryupdate(mrlock_t *mrp) -{ - if (!down_write_trylock(&mrp->mr_lock)) - return 0; -#ifdef DEBUG - mrp->mr_writer = 1; -#endif - return 1; -} - -static inline void mrunlock_excl(mrlock_t *mrp) -{ -#ifdef DEBUG - mrp->mr_writer = 0; -#endif - up_write(&mrp->mr_lock); -} - -static inline void mrunlock_shared(mrlock_t *mrp) -{ - up_read(&mrp->mr_lock); -} - -static inline void mrdemote(mrlock_t *mrp) -{ -#ifdef DEBUG - mrp->mr_writer = 0; -#endif - downgrade_write(&mrp->mr_lock); -} - -#endif /* __XFS_SUPPORT_MRLOCK_H__ */ diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h deleted file mode 100644 index 387e695..0000000 --- a/fs/xfs/linux-2.6/time.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_TIME_H__ -#define __XFS_SUPPORT_TIME_H__ - -#include -#include - -typedef struct timespec timespec_t; - -static inline void delay(long ticks) -{ - schedule_timeout_uninterruptible(ticks); -} - -static inline void nanotime(struct timespec *tvp) -{ - *tvp = CURRENT_TIME; -} - -#endif /* __XFS_SUPPORT_TIME_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c deleted file mode 100644 index b6c4b37..0000000 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Copyright (c) 2008, Christoph Hellwig - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_acl.h" -#include "xfs_attr.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_vnodeops.h" -#include "xfs_trace.h" -#include -#include -#include - - -/* - * Locking scheme: - * - all ACL updates are protected by inode->i_mutex, which is taken before - * calling into this file. - */ - -STATIC struct posix_acl * -xfs_acl_from_disk(struct xfs_acl *aclp) -{ - struct posix_acl_entry *acl_e; - struct posix_acl *acl; - struct xfs_acl_entry *ace; - int count, i; - - count = be32_to_cpu(aclp->acl_cnt); - - acl = posix_acl_alloc(count, GFP_KERNEL); - if (!acl) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < count; i++) { - acl_e = &acl->a_entries[i]; - ace = &aclp->acl_entry[i]; - - /* - * The tag is 32 bits on disk and 16 bits in core. - * - * Because every access to it goes through the core - * format first this is not a problem. - */ - acl_e->e_tag = be32_to_cpu(ace->ae_tag); - acl_e->e_perm = be16_to_cpu(ace->ae_perm); - - switch (acl_e->e_tag) { - case ACL_USER: - case ACL_GROUP: - acl_e->e_id = be32_to_cpu(ace->ae_id); - break; - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - acl_e->e_id = ACL_UNDEFINED_ID; - break; - default: - goto fail; - } - } - return acl; - -fail: - posix_acl_release(acl); - return ERR_PTR(-EINVAL); -} - -STATIC void -xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) -{ - const struct posix_acl_entry *acl_e; - struct xfs_acl_entry *ace; - int i; - - aclp->acl_cnt = cpu_to_be32(acl->a_count); - for (i = 0; i < acl->a_count; i++) { - ace = &aclp->acl_entry[i]; - acl_e = &acl->a_entries[i]; - - ace->ae_tag = cpu_to_be32(acl_e->e_tag); - ace->ae_id = cpu_to_be32(acl_e->e_id); - ace->ae_perm = cpu_to_be16(acl_e->e_perm); - } -} - -struct posix_acl * -xfs_get_acl(struct inode *inode, int type) -{ - struct xfs_inode *ip = XFS_I(inode); - struct posix_acl *acl; - struct xfs_acl *xfs_acl; - int len = sizeof(struct xfs_acl); - unsigned char *ea_name; - int error; - - acl = get_cached_acl(inode, type); - if (acl != ACL_NOT_CACHED) - return acl; - - trace_xfs_get_acl(ip); - - switch (type) { - case ACL_TYPE_ACCESS: - ea_name = SGI_ACL_FILE; - break; - case ACL_TYPE_DEFAULT: - ea_name = SGI_ACL_DEFAULT; - break; - default: - BUG(); - } - - /* - * If we have a cached ACLs value just return it, not need to - * go out to the disk. - */ - - xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); - if (!xfs_acl) - return ERR_PTR(-ENOMEM); - - error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, - &len, ATTR_ROOT); - if (error) { - /* - * If the attribute doesn't exist make sure we have a negative - * cache entry, for any other error assume it is transient and - * leave the cache entry as ACL_NOT_CACHED. - */ - if (error == -ENOATTR) { - acl = NULL; - goto out_update_cache; - } - goto out; - } - - acl = xfs_acl_from_disk(xfs_acl); - if (IS_ERR(acl)) - goto out; - - out_update_cache: - set_cached_acl(inode, type, acl); - out: - kfree(xfs_acl); - return acl; -} - -STATIC int -xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) -{ - struct xfs_inode *ip = XFS_I(inode); - unsigned char *ea_name; - int error; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - switch (type) { - case ACL_TYPE_ACCESS: - ea_name = SGI_ACL_FILE; - break; - case ACL_TYPE_DEFAULT: - if (!S_ISDIR(inode->i_mode)) - return acl ? -EACCES : 0; - ea_name = SGI_ACL_DEFAULT; - break; - default: - return -EINVAL; - } - - if (acl) { - struct xfs_acl *xfs_acl; - int len; - - xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); - if (!xfs_acl) - return -ENOMEM; - - xfs_acl_to_disk(xfs_acl, acl); - len = sizeof(struct xfs_acl) - - (sizeof(struct xfs_acl_entry) * - (XFS_ACL_MAX_ENTRIES - acl->a_count)); - - error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, - len, ATTR_ROOT); - - kfree(xfs_acl); - } else { - /* - * A NULL ACL argument means we want to remove the ACL. - */ - error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT); - - /* - * If the attribute didn't exist to start with that's fine. - */ - if (error == -ENOATTR) - error = 0; - } - - if (!error) - set_cached_acl(inode, type, acl); - return error; -} - -static int -xfs_set_mode(struct inode *inode, umode_t mode) -{ - int error = 0; - - if (mode != inode->i_mode) { - struct iattr iattr; - - iattr.ia_valid = ATTR_MODE | ATTR_CTIME; - iattr.ia_mode = mode; - iattr.ia_ctime = current_fs_time(inode->i_sb); - - error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); - } - - return error; -} - -static int -xfs_acl_exists(struct inode *inode, unsigned char *name) -{ - int len = sizeof(struct xfs_acl); - - return (xfs_attr_get(XFS_I(inode), name, NULL, &len, - ATTR_ROOT|ATTR_KERNOVAL) == 0); -} - -int -posix_acl_access_exists(struct inode *inode) -{ - return xfs_acl_exists(inode, SGI_ACL_FILE); -} - -int -posix_acl_default_exists(struct inode *inode) -{ - if (!S_ISDIR(inode->i_mode)) - return 0; - return xfs_acl_exists(inode, SGI_ACL_DEFAULT); -} - -/* - * No need for i_mutex because the inode is not yet exposed to the VFS. - */ -int -xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) -{ - umode_t mode = inode->i_mode; - int error = 0, inherit = 0; - - if (S_ISDIR(inode->i_mode)) { - error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl); - if (error) - goto out; - } - - error = posix_acl_create(&acl, GFP_KERNEL, &mode); - if (error < 0) - return error; - - /* - * If posix_acl_create returns a positive value we need to - * inherit a permission that can't be represented using the Unix - * mode bits and we actually need to set an ACL. - */ - if (error > 0) - inherit = 1; - - error = xfs_set_mode(inode, mode); - if (error) - goto out; - - if (inherit) - error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); - -out: - posix_acl_release(acl); - return error; -} - -int -xfs_acl_chmod(struct inode *inode) -{ - struct posix_acl *acl; - int error; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl) || !acl) - return PTR_ERR(acl); - - error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); - if (error) - return error; - - error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); - posix_acl_release(acl); - return error; -} - -static int -xfs_xattr_acl_get(struct dentry *dentry, const char *name, - void *value, size_t size, int type) -{ - struct posix_acl *acl; - int error; - - acl = xfs_get_acl(dentry->d_inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl == NULL) - return -ENODATA; - - error = posix_acl_to_xattr(acl, value, size); - posix_acl_release(acl); - - return error; -} - -static int -xfs_xattr_acl_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - struct inode *inode = dentry->d_inode; - struct posix_acl *acl = NULL; - int error = 0; - - if (flags & XATTR_CREATE) - return -EINVAL; - if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) - return value ? -EACCES : 0; - if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) - return -EPERM; - - if (!value) - goto set_acl; - - acl = posix_acl_from_xattr(value, size); - if (!acl) { - /* - * acl_set_file(3) may request that we set default ACLs with - * zero length -- defend (gracefully) against that here. - */ - goto out; - } - if (IS_ERR(acl)) { - error = PTR_ERR(acl); - goto out; - } - - error = posix_acl_valid(acl); - if (error) - goto out_release; - - error = -EINVAL; - if (acl->a_count > XFS_ACL_MAX_ENTRIES) - goto out_release; - - if (type == ACL_TYPE_ACCESS) { - umode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); - - if (error <= 0) { - posix_acl_release(acl); - acl = NULL; - - if (error < 0) - return error; - } - - error = xfs_set_mode(inode, mode); - if (error) - goto out_release; - } - - set_acl: - error = xfs_set_acl(inode, type, acl); - out_release: - posix_acl_release(acl); - out: - return error; -} - -const struct xattr_handler xfs_xattr_acl_access_handler = { - .prefix = POSIX_ACL_XATTR_ACCESS, - .flags = ACL_TYPE_ACCESS, - .get = xfs_xattr_acl_get, - .set = xfs_xattr_acl_set, -}; - -const struct xattr_handler xfs_xattr_acl_default_handler = { - .prefix = POSIX_ACL_XATTR_DEFAULT, - .flags = ACL_TYPE_DEFAULT, - .get = xfs_xattr_acl_get, - .set = xfs_xattr_acl_set, -}; diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c deleted file mode 100644 index 63e971e..0000000 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ /dev/null @@ -1,1499 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_trans.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_alloc.h" -#include "xfs_error.h" -#include "xfs_rw.h" -#include "xfs_iomap.h" -#include "xfs_vnodeops.h" -#include "xfs_trace.h" -#include "xfs_bmap.h" -#include -#include -#include -#include - - -/* - * Prime number of hash buckets since address is used as the key. - */ -#define NVSYNC 37 -#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC]) -static wait_queue_head_t xfs_ioend_wq[NVSYNC]; - -void __init -xfs_ioend_init(void) -{ - int i; - - for (i = 0; i < NVSYNC; i++) - init_waitqueue_head(&xfs_ioend_wq[i]); -} - -void -xfs_ioend_wait( - xfs_inode_t *ip) -{ - wait_queue_head_t *wq = to_ioend_wq(ip); - - wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); -} - -STATIC void -xfs_ioend_wake( - xfs_inode_t *ip) -{ - if (atomic_dec_and_test(&ip->i_iocount)) - wake_up(to_ioend_wq(ip)); -} - -void -xfs_count_page_state( - struct page *page, - int *delalloc, - int *unwritten) -{ - struct buffer_head *bh, *head; - - *delalloc = *unwritten = 0; - - bh = head = page_buffers(page); - do { - if (buffer_unwritten(bh)) - (*unwritten) = 1; - else if (buffer_delay(bh)) - (*delalloc) = 1; - } while ((bh = bh->b_this_page) != head); -} - -STATIC struct block_device * -xfs_find_bdev_for_inode( - struct inode *inode) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - - if (XFS_IS_REALTIME_INODE(ip)) - return mp->m_rtdev_targp->bt_bdev; - else - return mp->m_ddev_targp->bt_bdev; -} - -/* - * We're now finished for good with this ioend structure. - * Update the page state via the associated buffer_heads, - * release holds on the inode and bio, and finally free - * up memory. Do not use the ioend after this. - */ -STATIC void -xfs_destroy_ioend( - xfs_ioend_t *ioend) -{ - struct buffer_head *bh, *next; - struct xfs_inode *ip = XFS_I(ioend->io_inode); - - for (bh = ioend->io_buffer_head; bh; bh = next) { - next = bh->b_private; - bh->b_end_io(bh, !ioend->io_error); - } - - /* - * Volume managers supporting multiple paths can send back ENODEV - * when the final path disappears. In this case continuing to fill - * the page cache with dirty data which cannot be written out is - * evil, so prevent that. - */ - if (unlikely(ioend->io_error == -ENODEV)) { - xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, - __FILE__, __LINE__); - } - - xfs_ioend_wake(ip); - mempool_free(ioend, xfs_ioend_pool); -} - -/* - * If the end of the current ioend is beyond the current EOF, - * return the new EOF value, otherwise zero. - */ -STATIC xfs_fsize_t -xfs_ioend_new_eof( - xfs_ioend_t *ioend) -{ - xfs_inode_t *ip = XFS_I(ioend->io_inode); - xfs_fsize_t isize; - xfs_fsize_t bsize; - - bsize = ioend->io_offset + ioend->io_size; - isize = MAX(ip->i_size, ip->i_new_size); - isize = MIN(isize, bsize); - return isize > ip->i_d.di_size ? isize : 0; -} - -/* - * Update on-disk file size now that data has been written to disk. The - * current in-memory file size is i_size. If a write is beyond eof i_new_size - * will be the intended file size until i_size is updated. If this write does - * not extend all the way to the valid file size then restrict this update to - * the end of the write. - * - * This function does not block as blocking on the inode lock in IO completion - * can lead to IO completion order dependency deadlocks.. If it can't get the - * inode ilock it will return EAGAIN. Callers must handle this. - */ -STATIC int -xfs_setfilesize( - xfs_ioend_t *ioend) -{ - xfs_inode_t *ip = XFS_I(ioend->io_inode); - xfs_fsize_t isize; - - if (unlikely(ioend->io_error)) - return 0; - - if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) - return EAGAIN; - - isize = xfs_ioend_new_eof(ioend); - if (isize) { - trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); - ip->i_d.di_size = isize; - xfs_mark_inode_dirty(ip); - } - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return 0; -} - -/* - * Schedule IO completion handling on the final put of an ioend. - */ -STATIC void -xfs_finish_ioend( - struct xfs_ioend *ioend) -{ - if (atomic_dec_and_test(&ioend->io_remaining)) { - if (ioend->io_type == IO_UNWRITTEN) - queue_work(xfsconvertd_workqueue, &ioend->io_work); - else - queue_work(xfsdatad_workqueue, &ioend->io_work); - } -} - -/* - * IO write completion. - */ -STATIC void -xfs_end_io( - struct work_struct *work) -{ - xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); - struct xfs_inode *ip = XFS_I(ioend->io_inode); - int error = 0; - - /* - * For unwritten extents we need to issue transactions to convert a - * range to normal written extens after the data I/O has finished. - */ - if (ioend->io_type == IO_UNWRITTEN && - likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { - - error = xfs_iomap_write_unwritten(ip, ioend->io_offset, - ioend->io_size); - if (error) - ioend->io_error = error; - } - - /* - * We might have to update the on-disk file size after extending - * writes. - */ - error = xfs_setfilesize(ioend); - ASSERT(!error || error == EAGAIN); - - /* - * If we didn't complete processing of the ioend, requeue it to the - * tail of the workqueue for another attempt later. Otherwise destroy - * it. - */ - if (error == EAGAIN) { - atomic_inc(&ioend->io_remaining); - xfs_finish_ioend(ioend); - /* ensure we don't spin on blocked ioends */ - delay(1); - } else { - if (ioend->io_iocb) - aio_complete(ioend->io_iocb, ioend->io_result, 0); - xfs_destroy_ioend(ioend); - } -} - -/* - * Call IO completion handling in caller context on the final put of an ioend. - */ -STATIC void -xfs_finish_ioend_sync( - struct xfs_ioend *ioend) -{ - if (atomic_dec_and_test(&ioend->io_remaining)) - xfs_end_io(&ioend->io_work); -} - -/* - * Allocate and initialise an IO completion structure. - * We need to track unwritten extent write completion here initially. - * We'll need to extend this for updating the ondisk inode size later - * (vs. incore size). - */ -STATIC xfs_ioend_t * -xfs_alloc_ioend( - struct inode *inode, - unsigned int type) -{ - xfs_ioend_t *ioend; - - ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); - - /* - * Set the count to 1 initially, which will prevent an I/O - * completion callback from happening before we have started - * all the I/O from calling the completion routine too early. - */ - atomic_set(&ioend->io_remaining, 1); - ioend->io_error = 0; - ioend->io_list = NULL; - ioend->io_type = type; - ioend->io_inode = inode; - ioend->io_buffer_head = NULL; - ioend->io_buffer_tail = NULL; - atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); - ioend->io_offset = 0; - ioend->io_size = 0; - ioend->io_iocb = NULL; - ioend->io_result = 0; - - INIT_WORK(&ioend->io_work, xfs_end_io); - return ioend; -} - -STATIC int -xfs_map_blocks( - struct inode *inode, - loff_t offset, - struct xfs_bmbt_irec *imap, - int type, - int nonblocking) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - ssize_t count = 1 << inode->i_blkbits; - xfs_fileoff_t offset_fsb, end_fsb; - int error = 0; - int bmapi_flags = XFS_BMAPI_ENTIRE; - int nimaps = 1; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); - - if (type == IO_UNWRITTEN) - bmapi_flags |= XFS_BMAPI_IGSTATE; - - if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { - if (nonblocking) - return -XFS_ERROR(EAGAIN); - xfs_ilock(ip, XFS_ILOCK_SHARED); - } - - ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || - (ip->i_df.if_flags & XFS_IFEXTENTS)); - ASSERT(offset <= mp->m_maxioffset); - - if (offset + count > mp->m_maxioffset) - count = mp->m_maxioffset - offset; - end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); - offset_fsb = XFS_B_TO_FSBT(mp, offset); - error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, - bmapi_flags, NULL, 0, imap, &nimaps, NULL); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (error) - return -XFS_ERROR(error); - - if (type == IO_DELALLOC && - (!nimaps || isnullstartblock(imap->br_startblock))) { - error = xfs_iomap_write_allocate(ip, offset, count, imap); - if (!error) - trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); - return -XFS_ERROR(error); - } - -#ifdef DEBUG - if (type == IO_UNWRITTEN) { - ASSERT(nimaps); - ASSERT(imap->br_startblock != HOLESTARTBLOCK); - ASSERT(imap->br_startblock != DELAYSTARTBLOCK); - } -#endif - if (nimaps) - trace_xfs_map_blocks_found(ip, offset, count, type, imap); - return 0; -} - -STATIC int -xfs_imap_valid( - struct inode *inode, - struct xfs_bmbt_irec *imap, - xfs_off_t offset) -{ - offset >>= inode->i_blkbits; - - return offset >= imap->br_startoff && - offset < imap->br_startoff + imap->br_blockcount; -} - -/* - * BIO completion handler for buffered IO. - */ -STATIC void -xfs_end_bio( - struct bio *bio, - int error) -{ - xfs_ioend_t *ioend = bio->bi_private; - - ASSERT(atomic_read(&bio->bi_cnt) >= 1); - ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; - - /* Toss bio and pass work off to an xfsdatad thread */ - bio->bi_private = NULL; - bio->bi_end_io = NULL; - bio_put(bio); - - xfs_finish_ioend(ioend); -} - -STATIC void -xfs_submit_ioend_bio( - struct writeback_control *wbc, - xfs_ioend_t *ioend, - struct bio *bio) -{ - atomic_inc(&ioend->io_remaining); - bio->bi_private = ioend; - bio->bi_end_io = xfs_end_bio; - - /* - * If the I/O is beyond EOF we mark the inode dirty immediately - * but don't update the inode size until I/O completion. - */ - if (xfs_ioend_new_eof(ioend)) - xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); - - submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); -} - -STATIC struct bio * -xfs_alloc_ioend_bio( - struct buffer_head *bh) -{ - int nvecs = bio_get_nr_vecs(bh->b_bdev); - struct bio *bio = bio_alloc(GFP_NOIO, nvecs); - - ASSERT(bio->bi_private == NULL); - bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio->bi_bdev = bh->b_bdev; - return bio; -} - -STATIC void -xfs_start_buffer_writeback( - struct buffer_head *bh) -{ - ASSERT(buffer_mapped(bh)); - ASSERT(buffer_locked(bh)); - ASSERT(!buffer_delay(bh)); - ASSERT(!buffer_unwritten(bh)); - - mark_buffer_async_write(bh); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); -} - -STATIC void -xfs_start_page_writeback( - struct page *page, - int clear_dirty, - int buffers) -{ - ASSERT(PageLocked(page)); - ASSERT(!PageWriteback(page)); - if (clear_dirty) - clear_page_dirty_for_io(page); - set_page_writeback(page); - unlock_page(page); - /* If no buffers on the page are to be written, finish it here */ - if (!buffers) - end_page_writeback(page); -} - -static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) -{ - return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); -} - -/* - * Submit all of the bios for all of the ioends we have saved up, covering the - * initial writepage page and also any probed pages. - * - * Because we may have multiple ioends spanning a page, we need to start - * writeback on all the buffers before we submit them for I/O. If we mark the - * buffers as we got, then we can end up with a page that only has buffers - * marked async write and I/O complete on can occur before we mark the other - * buffers async write. - * - * The end result of this is that we trip a bug in end_page_writeback() because - * we call it twice for the one page as the code in end_buffer_async_write() - * assumes that all buffers on the page are started at the same time. - * - * The fix is two passes across the ioend list - one to start writeback on the - * buffer_heads, and then submit them for I/O on the second pass. - */ -STATIC void -xfs_submit_ioend( - struct writeback_control *wbc, - xfs_ioend_t *ioend) -{ - xfs_ioend_t *head = ioend; - xfs_ioend_t *next; - struct buffer_head *bh; - struct bio *bio; - sector_t lastblock = 0; - - /* Pass 1 - start writeback */ - do { - next = ioend->io_list; - for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) - xfs_start_buffer_writeback(bh); - } while ((ioend = next) != NULL); - - /* Pass 2 - submit I/O */ - ioend = head; - do { - next = ioend->io_list; - bio = NULL; - - for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { - - if (!bio) { - retry: - bio = xfs_alloc_ioend_bio(bh); - } else if (bh->b_blocknr != lastblock + 1) { - xfs_submit_ioend_bio(wbc, ioend, bio); - goto retry; - } - - if (bio_add_buffer(bio, bh) != bh->b_size) { - xfs_submit_ioend_bio(wbc, ioend, bio); - goto retry; - } - - lastblock = bh->b_blocknr; - } - if (bio) - xfs_submit_ioend_bio(wbc, ioend, bio); - xfs_finish_ioend(ioend); - } while ((ioend = next) != NULL); -} - -/* - * Cancel submission of all buffer_heads so far in this endio. - * Toss the endio too. Only ever called for the initial page - * in a writepage request, so only ever one page. - */ -STATIC void -xfs_cancel_ioend( - xfs_ioend_t *ioend) -{ - xfs_ioend_t *next; - struct buffer_head *bh, *next_bh; - - do { - next = ioend->io_list; - bh = ioend->io_buffer_head; - do { - next_bh = bh->b_private; - clear_buffer_async_write(bh); - unlock_buffer(bh); - } while ((bh = next_bh) != NULL); - - xfs_ioend_wake(XFS_I(ioend->io_inode)); - mempool_free(ioend, xfs_ioend_pool); - } while ((ioend = next) != NULL); -} - -/* - * Test to see if we've been building up a completion structure for - * earlier buffers -- if so, we try to append to this ioend if we - * can, otherwise we finish off any current ioend and start another. - * Return true if we've finished the given ioend. - */ -STATIC void -xfs_add_to_ioend( - struct inode *inode, - struct buffer_head *bh, - xfs_off_t offset, - unsigned int type, - xfs_ioend_t **result, - int need_ioend) -{ - xfs_ioend_t *ioend = *result; - - if (!ioend || need_ioend || type != ioend->io_type) { - xfs_ioend_t *previous = *result; - - ioend = xfs_alloc_ioend(inode, type); - ioend->io_offset = offset; - ioend->io_buffer_head = bh; - ioend->io_buffer_tail = bh; - if (previous) - previous->io_list = ioend; - *result = ioend; - } else { - ioend->io_buffer_tail->b_private = bh; - ioend->io_buffer_tail = bh; - } - - bh->b_private = NULL; - ioend->io_size += bh->b_size; -} - -STATIC void -xfs_map_buffer( - struct inode *inode, - struct buffer_head *bh, - struct xfs_bmbt_irec *imap, - xfs_off_t offset) -{ - sector_t bn; - struct xfs_mount *m = XFS_I(inode)->i_mount; - xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); - xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); - - ASSERT(imap->br_startblock != HOLESTARTBLOCK); - ASSERT(imap->br_startblock != DELAYSTARTBLOCK); - - bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + - ((offset - iomap_offset) >> inode->i_blkbits); - - ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); - - bh->b_blocknr = bn; - set_buffer_mapped(bh); -} - -STATIC void -xfs_map_at_offset( - struct inode *inode, - struct buffer_head *bh, - struct xfs_bmbt_irec *imap, - xfs_off_t offset) -{ - ASSERT(imap->br_startblock != HOLESTARTBLOCK); - ASSERT(imap->br_startblock != DELAYSTARTBLOCK); - - xfs_map_buffer(inode, bh, imap, offset); - set_buffer_mapped(bh); - clear_buffer_delay(bh); - clear_buffer_unwritten(bh); -} - -/* - * Test if a given page is suitable for writing as part of an unwritten - * or delayed allocate extent. - */ -STATIC int -xfs_is_delayed_page( - struct page *page, - unsigned int type) -{ - if (PageWriteback(page)) - return 0; - - if (page->mapping && page_has_buffers(page)) { - struct buffer_head *bh, *head; - int acceptable = 0; - - bh = head = page_buffers(page); - do { - if (buffer_unwritten(bh)) - acceptable = (type == IO_UNWRITTEN); - else if (buffer_delay(bh)) - acceptable = (type == IO_DELALLOC); - else if (buffer_dirty(bh) && buffer_mapped(bh)) - acceptable = (type == IO_OVERWRITE); - else - break; - } while ((bh = bh->b_this_page) != head); - - if (acceptable) - return 1; - } - - return 0; -} - -/* - * Allocate & map buffers for page given the extent map. Write it out. - * except for the original page of a writepage, this is called on - * delalloc/unwritten pages only, for the original page it is possible - * that the page has no mapping at all. - */ -STATIC int -xfs_convert_page( - struct inode *inode, - struct page *page, - loff_t tindex, - struct xfs_bmbt_irec *imap, - xfs_ioend_t **ioendp, - struct writeback_control *wbc) -{ - struct buffer_head *bh, *head; - xfs_off_t end_offset; - unsigned long p_offset; - unsigned int type; - int len, page_dirty; - int count = 0, done = 0, uptodate = 1; - xfs_off_t offset = page_offset(page); - - if (page->index != tindex) - goto fail; - if (!trylock_page(page)) - goto fail; - if (PageWriteback(page)) - goto fail_unlock_page; - if (page->mapping != inode->i_mapping) - goto fail_unlock_page; - if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) - goto fail_unlock_page; - - /* - * page_dirty is initially a count of buffers on the page before - * EOF and is decremented as we move each into a cleanable state. - * - * Derivation: - * - * End offset is the highest offset that this page should represent. - * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) - * will evaluate non-zero and be less than PAGE_CACHE_SIZE and - * hence give us the correct page_dirty count. On any other page, - * it will be zero and in that case we need page_dirty to be the - * count of buffers on the page. - */ - end_offset = min_t(unsigned long long, - (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, - i_size_read(inode)); - - len = 1 << inode->i_blkbits; - p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), - PAGE_CACHE_SIZE); - p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; - page_dirty = p_offset / len; - - bh = head = page_buffers(page); - do { - if (offset >= end_offset) - break; - if (!buffer_uptodate(bh)) - uptodate = 0; - if (!(PageUptodate(page) || buffer_uptodate(bh))) { - done = 1; - continue; - } - - if (buffer_unwritten(bh) || buffer_delay(bh) || - buffer_mapped(bh)) { - if (buffer_unwritten(bh)) - type = IO_UNWRITTEN; - else if (buffer_delay(bh)) - type = IO_DELALLOC; - else - type = IO_OVERWRITE; - - if (!xfs_imap_valid(inode, imap, offset)) { - done = 1; - continue; - } - - lock_buffer(bh); - if (type != IO_OVERWRITE) - xfs_map_at_offset(inode, bh, imap, offset); - xfs_add_to_ioend(inode, bh, offset, type, - ioendp, done); - - page_dirty--; - count++; - } else { - done = 1; - } - } while (offset += len, (bh = bh->b_this_page) != head); - - if (uptodate && bh == head) - SetPageUptodate(page); - - if (count) { - if (--wbc->nr_to_write <= 0 && - wbc->sync_mode == WB_SYNC_NONE) - done = 1; - } - xfs_start_page_writeback(page, !page_dirty, count); - - return done; - fail_unlock_page: - unlock_page(page); - fail: - return 1; -} - -/* - * Convert & write out a cluster of pages in the same extent as defined - * by mp and following the start page. - */ -STATIC void -xfs_cluster_write( - struct inode *inode, - pgoff_t tindex, - struct xfs_bmbt_irec *imap, - xfs_ioend_t **ioendp, - struct writeback_control *wbc, - pgoff_t tlast) -{ - struct pagevec pvec; - int done = 0, i; - - pagevec_init(&pvec, 0); - while (!done && tindex <= tlast) { - unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); - - if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) - break; - - for (i = 0; i < pagevec_count(&pvec); i++) { - done = xfs_convert_page(inode, pvec.pages[i], tindex++, - imap, ioendp, wbc); - if (done) - break; - } - - pagevec_release(&pvec); - cond_resched(); - } -} - -STATIC void -xfs_vm_invalidatepage( - struct page *page, - unsigned long offset) -{ - trace_xfs_invalidatepage(page->mapping->host, page, offset); - block_invalidatepage(page, offset); -} - -/* - * If the page has delalloc buffers on it, we need to punch them out before we - * invalidate the page. If we don't, we leave a stale delalloc mapping on the - * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read - * is done on that same region - the delalloc extent is returned when none is - * supposed to be there. - * - * We prevent this by truncating away the delalloc regions on the page before - * invalidating it. Because they are delalloc, we can do this without needing a - * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this - * truncation without a transaction as there is no space left for block - * reservation (typically why we see a ENOSPC in writeback). - * - * This is not a performance critical path, so for now just do the punching a - * buffer head at a time. - */ -STATIC void -xfs_aops_discard_page( - struct page *page) -{ - struct inode *inode = page->mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct buffer_head *bh, *head; - loff_t offset = page_offset(page); - - if (!xfs_is_delayed_page(page, IO_DELALLOC)) - goto out_invalidate; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - goto out_invalidate; - - xfs_alert(ip->i_mount, - "page discard on page %p, inode 0x%llx, offset %llu.", - page, ip->i_ino, offset); - - xfs_ilock(ip, XFS_ILOCK_EXCL); - bh = head = page_buffers(page); - do { - int error; - xfs_fileoff_t start_fsb; - - if (!buffer_delay(bh)) - goto next_buffer; - - start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); - if (error) { - /* something screwed, just bail */ - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_alert(ip->i_mount, - "page discard unable to remove delalloc mapping."); - } - break; - } -next_buffer: - offset += 1 << inode->i_blkbits; - - } while ((bh = bh->b_this_page) != head); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); -out_invalidate: - xfs_vm_invalidatepage(page, 0); - return; -} - -/* - * Write out a dirty page. - * - * For delalloc space on the page we need to allocate space and flush it. - * For unwritten space on the page we need to start the conversion to - * regular allocated space. - * For any other dirty buffer heads on the page we should flush them. - */ -STATIC int -xfs_vm_writepage( - struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct buffer_head *bh, *head; - struct xfs_bmbt_irec imap; - xfs_ioend_t *ioend = NULL, *iohead = NULL; - loff_t offset; - unsigned int type; - __uint64_t end_offset; - pgoff_t end_index, last_index; - ssize_t len; - int err, imap_valid = 0, uptodate = 1; - int count = 0; - int nonblocking = 0; - - trace_xfs_writepage(inode, page, 0); - - ASSERT(page_has_buffers(page)); - - /* - * Refuse to write the page out if we are called from reclaim context. - * - * This avoids stack overflows when called from deeply used stacks in - * random callers for direct reclaim or memcg reclaim. We explicitly - * allow reclaim from kswapd as the stack usage there is relatively low. - * - * This should really be done by the core VM, but until that happens - * filesystems like XFS, btrfs and ext4 have to take care of this - * by themselves. - */ - if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) - goto redirty; - - /* - * Given that we do not allow direct reclaim to call us, we should - * never be called while in a filesystem transaction. - */ - if (WARN_ON(current->flags & PF_FSTRANS)) - goto redirty; - - /* Is this page beyond the end of the file? */ - offset = i_size_read(inode); - end_index = offset >> PAGE_CACHE_SHIFT; - last_index = (offset - 1) >> PAGE_CACHE_SHIFT; - if (page->index >= end_index) { - if ((page->index >= end_index + 1) || - !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { - unlock_page(page); - return 0; - } - } - - end_offset = min_t(unsigned long long, - (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, - offset); - len = 1 << inode->i_blkbits; - - bh = head = page_buffers(page); - offset = page_offset(page); - type = IO_OVERWRITE; - - if (wbc->sync_mode == WB_SYNC_NONE) - nonblocking = 1; - - do { - int new_ioend = 0; - - if (offset >= end_offset) - break; - if (!buffer_uptodate(bh)) - uptodate = 0; - - /* - * set_page_dirty dirties all buffers in a page, independent - * of their state. The dirty state however is entirely - * meaningless for holes (!mapped && uptodate), so skip - * buffers covering holes here. - */ - if (!buffer_mapped(bh) && buffer_uptodate(bh)) { - imap_valid = 0; - continue; - } - - if (buffer_unwritten(bh)) { - if (type != IO_UNWRITTEN) { - type = IO_UNWRITTEN; - imap_valid = 0; - } - } else if (buffer_delay(bh)) { - if (type != IO_DELALLOC) { - type = IO_DELALLOC; - imap_valid = 0; - } - } else if (buffer_uptodate(bh)) { - if (type != IO_OVERWRITE) { - type = IO_OVERWRITE; - imap_valid = 0; - } - } else { - if (PageUptodate(page)) { - ASSERT(buffer_mapped(bh)); - imap_valid = 0; - } - continue; - } - - if (imap_valid) - imap_valid = xfs_imap_valid(inode, &imap, offset); - if (!imap_valid) { - /* - * If we didn't have a valid mapping then we need to - * put the new mapping into a separate ioend structure. - * This ensures non-contiguous extents always have - * separate ioends, which is particularly important - * for unwritten extent conversion at I/O completion - * time. - */ - new_ioend = 1; - err = xfs_map_blocks(inode, offset, &imap, type, - nonblocking); - if (err) - goto error; - imap_valid = xfs_imap_valid(inode, &imap, offset); - } - if (imap_valid) { - lock_buffer(bh); - if (type != IO_OVERWRITE) - xfs_map_at_offset(inode, bh, &imap, offset); - xfs_add_to_ioend(inode, bh, offset, type, &ioend, - new_ioend); - count++; - } - - if (!iohead) - iohead = ioend; - - } while (offset += len, ((bh = bh->b_this_page) != head)); - - if (uptodate && bh == head) - SetPageUptodate(page); - - xfs_start_page_writeback(page, 1, count); - - if (ioend && imap_valid) { - xfs_off_t end_index; - - end_index = imap.br_startoff + imap.br_blockcount; - - /* to bytes */ - end_index <<= inode->i_blkbits; - - /* to pages */ - end_index = (end_index - 1) >> PAGE_CACHE_SHIFT; - - /* check against file size */ - if (end_index > last_index) - end_index = last_index; - - xfs_cluster_write(inode, page->index + 1, &imap, &ioend, - wbc, end_index); - } - - if (iohead) - xfs_submit_ioend(wbc, iohead); - - return 0; - -error: - if (iohead) - xfs_cancel_ioend(iohead); - - if (err == -EAGAIN) - goto redirty; - - xfs_aops_discard_page(page); - ClearPageUptodate(page); - unlock_page(page); - return err; - -redirty: - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; -} - -STATIC int -xfs_vm_writepages( - struct address_space *mapping, - struct writeback_control *wbc) -{ - xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); - return generic_writepages(mapping, wbc); -} - -/* - * Called to move a page into cleanable state - and from there - * to be released. The page should already be clean. We always - * have buffer heads in this call. - * - * Returns 1 if the page is ok to release, 0 otherwise. - */ -STATIC int -xfs_vm_releasepage( - struct page *page, - gfp_t gfp_mask) -{ - int delalloc, unwritten; - - trace_xfs_releasepage(page->mapping->host, page, 0); - - xfs_count_page_state(page, &delalloc, &unwritten); - - if (WARN_ON(delalloc)) - return 0; - if (WARN_ON(unwritten)) - return 0; - - return try_to_free_buffers(page); -} - -STATIC int -__xfs_get_blocks( - struct inode *inode, - sector_t iblock, - struct buffer_head *bh_result, - int create, - int direct) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb, end_fsb; - int error = 0; - int lockmode = 0; - struct xfs_bmbt_irec imap; - int nimaps = 1; - xfs_off_t offset; - ssize_t size; - int new = 0; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); - - offset = (xfs_off_t)iblock << inode->i_blkbits; - ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); - size = bh_result->b_size; - - if (!create && direct && offset >= i_size_read(inode)) - return 0; - - if (create) { - lockmode = XFS_ILOCK_EXCL; - xfs_ilock(ip, lockmode); - } else { - lockmode = xfs_ilock_map_shared(ip); - } - - ASSERT(offset <= mp->m_maxioffset); - if (offset + size > mp->m_maxioffset) - size = mp->m_maxioffset - offset; - end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); - offset_fsb = XFS_B_TO_FSBT(mp, offset); - - error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, - XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL); - if (error) - goto out_unlock; - - if (create && - (!nimaps || - (imap.br_startblock == HOLESTARTBLOCK || - imap.br_startblock == DELAYSTARTBLOCK))) { - if (direct) { - error = xfs_iomap_write_direct(ip, offset, size, - &imap, nimaps); - } else { - error = xfs_iomap_write_delay(ip, offset, size, &imap); - } - if (error) - goto out_unlock; - - trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); - } else if (nimaps) { - trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); - } else { - trace_xfs_get_blocks_notfound(ip, offset, size); - goto out_unlock; - } - xfs_iunlock(ip, lockmode); - - if (imap.br_startblock != HOLESTARTBLOCK && - imap.br_startblock != DELAYSTARTBLOCK) { - /* - * For unwritten extents do not report a disk address on - * the read case (treat as if we're reading into a hole). - */ - if (create || !ISUNWRITTEN(&imap)) - xfs_map_buffer(inode, bh_result, &imap, offset); - if (create && ISUNWRITTEN(&imap)) { - if (direct) - bh_result->b_private = inode; - set_buffer_unwritten(bh_result); - } - } - - /* - * If this is a realtime file, data may be on a different device. - * to that pointed to from the buffer_head b_bdev currently. - */ - bh_result->b_bdev = xfs_find_bdev_for_inode(inode); - - /* - * If we previously allocated a block out beyond eof and we are now - * coming back to use it then we will need to flag it as new even if it - * has a disk address. - * - * With sub-block writes into unwritten extents we also need to mark - * the buffer as new so that the unwritten parts of the buffer gets - * correctly zeroed. - */ - if (create && - ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || - (offset >= i_size_read(inode)) || - (new || ISUNWRITTEN(&imap)))) - set_buffer_new(bh_result); - - if (imap.br_startblock == DELAYSTARTBLOCK) { - BUG_ON(direct); - if (create) { - set_buffer_uptodate(bh_result); - set_buffer_mapped(bh_result); - set_buffer_delay(bh_result); - } - } - - /* - * If this is O_DIRECT or the mpage code calling tell them how large - * the mapping is, so that we can avoid repeated get_blocks calls. - */ - if (direct || size > (1 << inode->i_blkbits)) { - xfs_off_t mapping_size; - - mapping_size = imap.br_startoff + imap.br_blockcount - iblock; - mapping_size <<= inode->i_blkbits; - - ASSERT(mapping_size > 0); - if (mapping_size > size) - mapping_size = size; - if (mapping_size > LONG_MAX) - mapping_size = LONG_MAX; - - bh_result->b_size = mapping_size; - } - - return 0; - -out_unlock: - xfs_iunlock(ip, lockmode); - return -error; -} - -int -xfs_get_blocks( - struct inode *inode, - sector_t iblock, - struct buffer_head *bh_result, - int create) -{ - return __xfs_get_blocks(inode, iblock, bh_result, create, 0); -} - -STATIC int -xfs_get_blocks_direct( - struct inode *inode, - sector_t iblock, - struct buffer_head *bh_result, - int create) -{ - return __xfs_get_blocks(inode, iblock, bh_result, create, 1); -} - -/* - * Complete a direct I/O write request. - * - * If the private argument is non-NULL __xfs_get_blocks signals us that we - * need to issue a transaction to convert the range from unwritten to written - * extents. In case this is regular synchronous I/O we just call xfs_end_io - * to do this and we are done. But in case this was a successful AIO - * request this handler is called from interrupt context, from which we - * can't start transactions. In that case offload the I/O completion to - * the workqueues we also use for buffered I/O completion. - */ -STATIC void -xfs_end_io_direct_write( - struct kiocb *iocb, - loff_t offset, - ssize_t size, - void *private, - int ret, - bool is_async) -{ - struct xfs_ioend *ioend = iocb->private; - - /* - * blockdev_direct_IO can return an error even after the I/O - * completion handler was called. Thus we need to protect - * against double-freeing. - */ - iocb->private = NULL; - - ioend->io_offset = offset; - ioend->io_size = size; - if (private && size > 0) - ioend->io_type = IO_UNWRITTEN; - - if (is_async) { - /* - * If we are converting an unwritten extent we need to delay - * the AIO completion until after the unwrittent extent - * conversion has completed, otherwise do it ASAP. - */ - if (ioend->io_type == IO_UNWRITTEN) { - ioend->io_iocb = iocb; - ioend->io_result = ret; - } else { - aio_complete(iocb, ret, 0); - } - xfs_finish_ioend(ioend); - } else { - xfs_finish_ioend_sync(ioend); - } - - /* XXX: probably should move into the real I/O completion handler */ - inode_dio_done(ioend->io_inode); -} - -STATIC ssize_t -xfs_vm_direct_IO( - int rw, - struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, - unsigned long nr_segs) -{ - struct inode *inode = iocb->ki_filp->f_mapping->host; - struct block_device *bdev = xfs_find_bdev_for_inode(inode); - ssize_t ret; - - if (rw & WRITE) { - iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); - - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, - offset, nr_segs, - xfs_get_blocks_direct, - xfs_end_io_direct_write, NULL, 0); - if (ret != -EIOCBQUEUED && iocb->private) - xfs_destroy_ioend(iocb->private); - } else { - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, - offset, nr_segs, - xfs_get_blocks_direct, - NULL, NULL, 0); - } - - return ret; -} - -STATIC void -xfs_vm_write_failed( - struct address_space *mapping, - loff_t to) -{ - struct inode *inode = mapping->host; - - if (to > inode->i_size) { - /* - * punch out the delalloc blocks we have already allocated. We - * don't call xfs_setattr() to do this as we may be in the - * middle of a multi-iovec write and so the vfs inode->i_size - * will not match the xfs ip->i_size and so it will zero too - * much. Hence we jus truncate the page cache to zero what is - * necessary and punch the delalloc blocks directly. - */ - struct xfs_inode *ip = XFS_I(inode); - xfs_fileoff_t start_fsb; - xfs_fileoff_t end_fsb; - int error; - - truncate_pagecache(inode, to, inode->i_size); - - /* - * Check if there are any blocks that are outside of i_size - * that need to be trimmed back. - */ - start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; - end_fsb = XFS_B_TO_FSB(ip->i_mount, to); - if (end_fsb <= start_fsb) - return; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - end_fsb - start_fsb); - if (error) { - /* something screwed, just bail */ - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_alert(ip->i_mount, - "xfs_vm_write_failed: unable to clean up ino %lld", - ip->i_ino); - } - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } -} - -STATIC int -xfs_vm_write_begin( - struct file *file, - struct address_space *mapping, - loff_t pos, - unsigned len, - unsigned flags, - struct page **pagep, - void **fsdata) -{ - int ret; - - ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, - pagep, xfs_get_blocks); - if (unlikely(ret)) - xfs_vm_write_failed(mapping, pos + len); - return ret; -} - -STATIC int -xfs_vm_write_end( - struct file *file, - struct address_space *mapping, - loff_t pos, - unsigned len, - unsigned copied, - struct page *page, - void *fsdata) -{ - int ret; - - ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (unlikely(ret < len)) - xfs_vm_write_failed(mapping, pos + len); - return ret; -} - -STATIC sector_t -xfs_vm_bmap( - struct address_space *mapping, - sector_t block) -{ - struct inode *inode = (struct inode *)mapping->host; - struct xfs_inode *ip = XFS_I(inode); - - trace_xfs_vm_bmap(XFS_I(inode)); - xfs_ilock(ip, XFS_IOLOCK_SHARED); - xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - return generic_block_bmap(mapping, block, xfs_get_blocks); -} - -STATIC int -xfs_vm_readpage( - struct file *unused, - struct page *page) -{ - return mpage_readpage(page, xfs_get_blocks); -} - -STATIC int -xfs_vm_readpages( - struct file *unused, - struct address_space *mapping, - struct list_head *pages, - unsigned nr_pages) -{ - return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); -} - -const struct address_space_operations xfs_address_space_operations = { - .readpage = xfs_vm_readpage, - .readpages = xfs_vm_readpages, - .writepage = xfs_vm_writepage, - .writepages = xfs_vm_writepages, - .releasepage = xfs_vm_releasepage, - .invalidatepage = xfs_vm_invalidatepage, - .write_begin = xfs_vm_write_begin, - .write_end = xfs_vm_write_end, - .bmap = xfs_vm_bmap, - .direct_IO = xfs_vm_direct_IO, - .migratepage = buffer_migrate_page, - .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, -}; diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h deleted file mode 100644 index 71f721e..0000000 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2005-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_AOPS_H__ -#define __XFS_AOPS_H__ - -extern struct workqueue_struct *xfsdatad_workqueue; -extern struct workqueue_struct *xfsconvertd_workqueue; -extern mempool_t *xfs_ioend_pool; - -/* - * Types of I/O for bmap clustering and I/O completion tracking. - */ -enum { - IO_DIRECT = 0, /* special case for direct I/O ioends */ - IO_DELALLOC, /* mapping covers delalloc region */ - IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ - IO_OVERWRITE, /* mapping covers already allocated extent */ -}; - -#define XFS_IO_TYPES \ - { 0, "" }, \ - { IO_DELALLOC, "delalloc" }, \ - { IO_UNWRITTEN, "unwritten" }, \ - { IO_OVERWRITE, "overwrite" } - -/* - * xfs_ioend struct manages large extent writes for XFS. - * It can manage several multi-page bio's at once. - */ -typedef struct xfs_ioend { - struct xfs_ioend *io_list; /* next ioend in chain */ - unsigned int io_type; /* delalloc / unwritten */ - int io_error; /* I/O error code */ - atomic_t io_remaining; /* hold count */ - struct inode *io_inode; /* file being written to */ - struct buffer_head *io_buffer_head;/* buffer linked list head */ - struct buffer_head *io_buffer_tail;/* buffer linked list tail */ - size_t io_size; /* size of the extent */ - xfs_off_t io_offset; /* offset in the file */ - struct work_struct io_work; /* xfsdatad work queue */ - struct kiocb *io_iocb; - int io_result; -} xfs_ioend_t; - -extern const struct address_space_operations xfs_address_space_operations; -extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); - -extern void xfs_ioend_init(void); -extern void xfs_ioend_wait(struct xfs_inode *); - -extern void xfs_count_page_state(struct page *, int *, int *); - -#endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c deleted file mode 100644 index c57836d..0000000 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ /dev/null @@ -1,1876 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xfs_sb.h" -#include "xfs_inum.h" -#include "xfs_log.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_trace.h" - -static kmem_zone_t *xfs_buf_zone; -STATIC int xfsbufd(void *); -STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); - -static struct workqueue_struct *xfslogd_workqueue; -struct workqueue_struct *xfsdatad_workqueue; -struct workqueue_struct *xfsconvertd_workqueue; - -#ifdef XFS_BUF_LOCK_TRACKING -# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) -# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) -# define XB_GET_OWNER(bp) ((bp)->b_last_holder) -#else -# define XB_SET_OWNER(bp) do { } while (0) -# define XB_CLEAR_OWNER(bp) do { } while (0) -# define XB_GET_OWNER(bp) do { } while (0) -#endif - -#define xb_to_gfp(flags) \ - ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \ - ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) - -#define xb_to_km(flags) \ - (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) - -#define xfs_buf_allocate(flags) \ - kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)) -#define xfs_buf_deallocate(bp) \ - kmem_zone_free(xfs_buf_zone, (bp)); - -static inline int -xfs_buf_is_vmapped( - struct xfs_buf *bp) -{ - /* - * Return true if the buffer is vmapped. - * - * The XBF_MAPPED flag is set if the buffer should be mapped, but the - * code is clever enough to know it doesn't have to map a single page, - * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1. - */ - return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1; -} - -static inline int -xfs_buf_vmap_len( - struct xfs_buf *bp) -{ - return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; -} - -/* - * xfs_buf_lru_add - add a buffer to the LRU. - * - * The LRU takes a new reference to the buffer so that it will only be freed - * once the shrinker takes the buffer off the LRU. - */ -STATIC void -xfs_buf_lru_add( - struct xfs_buf *bp) -{ - struct xfs_buftarg *btp = bp->b_target; - - spin_lock(&btp->bt_lru_lock); - if (list_empty(&bp->b_lru)) { - atomic_inc(&bp->b_hold); - list_add_tail(&bp->b_lru, &btp->bt_lru); - btp->bt_lru_nr++; - } - spin_unlock(&btp->bt_lru_lock); -} - -/* - * xfs_buf_lru_del - remove a buffer from the LRU - * - * The unlocked check is safe here because it only occurs when there are not - * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there - * to optimise the shrinker removing the buffer from the LRU and calling - * xfs_buf_free(). i.e. it removes an unnecessary round trip on the - * bt_lru_lock. - */ -STATIC void -xfs_buf_lru_del( - struct xfs_buf *bp) -{ - struct xfs_buftarg *btp = bp->b_target; - - if (list_empty(&bp->b_lru)) - return; - - spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { - list_del_init(&bp->b_lru); - btp->bt_lru_nr--; - } - spin_unlock(&btp->bt_lru_lock); -} - -/* - * When we mark a buffer stale, we remove the buffer from the LRU and clear the - * b_lru_ref count so that the buffer is freed immediately when the buffer - * reference count falls to zero. If the buffer is already on the LRU, we need - * to remove the reference that LRU holds on the buffer. - * - * This prevents build-up of stale buffers on the LRU. - */ -void -xfs_buf_stale( - struct xfs_buf *bp) -{ - bp->b_flags |= XBF_STALE; - atomic_set(&(bp)->b_lru_ref, 0); - if (!list_empty(&bp->b_lru)) { - struct xfs_buftarg *btp = bp->b_target; - - spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { - list_del_init(&bp->b_lru); - btp->bt_lru_nr--; - atomic_dec(&bp->b_hold); - } - spin_unlock(&btp->bt_lru_lock); - } - ASSERT(atomic_read(&bp->b_hold) >= 1); -} - -STATIC void -_xfs_buf_initialize( - xfs_buf_t *bp, - xfs_buftarg_t *target, - xfs_off_t range_base, - size_t range_length, - xfs_buf_flags_t flags) -{ - /* - * We don't want certain flags to appear in b_flags. - */ - flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD); - - memset(bp, 0, sizeof(xfs_buf_t)); - atomic_set(&bp->b_hold, 1); - atomic_set(&bp->b_lru_ref, 1); - init_completion(&bp->b_iowait); - INIT_LIST_HEAD(&bp->b_lru); - INIT_LIST_HEAD(&bp->b_list); - RB_CLEAR_NODE(&bp->b_rbnode); - sema_init(&bp->b_sema, 0); /* held, no waiters */ - XB_SET_OWNER(bp); - bp->b_target = target; - bp->b_file_offset = range_base; - /* - * Set buffer_length and count_desired to the same value initially. - * I/O routines should use count_desired, which will be the same in - * most cases but may be reset (e.g. XFS recovery). - */ - bp->b_buffer_length = bp->b_count_desired = range_length; - bp->b_flags = flags; - bp->b_bn = XFS_BUF_DADDR_NULL; - atomic_set(&bp->b_pin_count, 0); - init_waitqueue_head(&bp->b_waiters); - - XFS_STATS_INC(xb_create); - - trace_xfs_buf_init(bp, _RET_IP_); -} - -/* - * Allocate a page array capable of holding a specified number - * of pages, and point the page buf at it. - */ -STATIC int -_xfs_buf_get_pages( - xfs_buf_t *bp, - int page_count, - xfs_buf_flags_t flags) -{ - /* Make sure that we have a page list */ - if (bp->b_pages == NULL) { - bp->b_offset = xfs_buf_poff(bp->b_file_offset); - bp->b_page_count = page_count; - if (page_count <= XB_PAGES) { - bp->b_pages = bp->b_page_array; - } else { - bp->b_pages = kmem_alloc(sizeof(struct page *) * - page_count, xb_to_km(flags)); - if (bp->b_pages == NULL) - return -ENOMEM; - } - memset(bp->b_pages, 0, sizeof(struct page *) * page_count); - } - return 0; -} - -/* - * Frees b_pages if it was allocated. - */ -STATIC void -_xfs_buf_free_pages( - xfs_buf_t *bp) -{ - if (bp->b_pages != bp->b_page_array) { - kmem_free(bp->b_pages); - bp->b_pages = NULL; - } -} - -/* - * Releases the specified buffer. - * - * The modification state of any associated pages is left unchanged. - * The buffer most not be on any hash - use xfs_buf_rele instead for - * hashed and refcounted buffers - */ -void -xfs_buf_free( - xfs_buf_t *bp) -{ - trace_xfs_buf_free(bp, _RET_IP_); - - ASSERT(list_empty(&bp->b_lru)); - - if (bp->b_flags & _XBF_PAGES) { - uint i; - - if (xfs_buf_is_vmapped(bp)) - vm_unmap_ram(bp->b_addr - bp->b_offset, - bp->b_page_count); - - for (i = 0; i < bp->b_page_count; i++) { - struct page *page = bp->b_pages[i]; - - __free_page(page); - } - } else if (bp->b_flags & _XBF_KMEM) - kmem_free(bp->b_addr); - _xfs_buf_free_pages(bp); - xfs_buf_deallocate(bp); -} - -/* - * Allocates all the pages for buffer in question and builds it's page list. - */ -STATIC int -xfs_buf_allocate_memory( - xfs_buf_t *bp, - uint flags) -{ - size_t size = bp->b_count_desired; - size_t nbytes, offset; - gfp_t gfp_mask = xb_to_gfp(flags); - unsigned short page_count, i; - xfs_off_t end; - int error; - - /* - * for buffers that are contained within a single page, just allocate - * the memory from the heap - there's no need for the complexity of - * page arrays to keep allocation down to order 0. - */ - if (bp->b_buffer_length < PAGE_SIZE) { - bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); - if (!bp->b_addr) { - /* low memory - use alloc_page loop instead */ - goto use_alloc_page; - } - - if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & - PAGE_MASK) != - ((unsigned long)bp->b_addr & PAGE_MASK)) { - /* b_addr spans two pages - use alloc_page instead */ - kmem_free(bp->b_addr); - bp->b_addr = NULL; - goto use_alloc_page; - } - bp->b_offset = offset_in_page(bp->b_addr); - bp->b_pages = bp->b_page_array; - bp->b_pages[0] = virt_to_page(bp->b_addr); - bp->b_page_count = 1; - bp->b_flags |= XBF_MAPPED | _XBF_KMEM; - return 0; - } - -use_alloc_page: - end = bp->b_file_offset + bp->b_buffer_length; - page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); - error = _xfs_buf_get_pages(bp, page_count, flags); - if (unlikely(error)) - return error; - - offset = bp->b_offset; - bp->b_flags |= _XBF_PAGES; - - for (i = 0; i < bp->b_page_count; i++) { - struct page *page; - uint retries = 0; -retry: - page = alloc_page(gfp_mask); - if (unlikely(page == NULL)) { - if (flags & XBF_READ_AHEAD) { - bp->b_page_count = i; - error = ENOMEM; - goto out_free_pages; - } - - /* - * This could deadlock. - * - * But until all the XFS lowlevel code is revamped to - * handle buffer allocation failures we can't do much. - */ - if (!(++retries % 100)) - xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", - __func__, gfp_mask); - - XFS_STATS_INC(xb_page_retries); - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry; - } - - XFS_STATS_INC(xb_page_found); - - nbytes = min_t(size_t, size, PAGE_SIZE - offset); - size -= nbytes; - bp->b_pages[i] = page; - offset = 0; - } - return 0; - -out_free_pages: - for (i = 0; i < bp->b_page_count; i++) - __free_page(bp->b_pages[i]); - return error; -} - -/* - * Map buffer into kernel address-space if necessary. - */ -STATIC int -_xfs_buf_map_pages( - xfs_buf_t *bp, - uint flags) -{ - ASSERT(bp->b_flags & _XBF_PAGES); - if (bp->b_page_count == 1) { - /* A single page buffer is always mappable */ - bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; - bp->b_flags |= XBF_MAPPED; - } else if (flags & XBF_MAPPED) { - int retried = 0; - - do { - bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, - -1, PAGE_KERNEL); - if (bp->b_addr) - break; - vm_unmap_aliases(); - } while (retried++ <= 1); - - if (!bp->b_addr) - return -ENOMEM; - bp->b_addr += bp->b_offset; - bp->b_flags |= XBF_MAPPED; - } - - return 0; -} - -/* - * Finding and Reading Buffers - */ - -/* - * Look up, and creates if absent, a lockable buffer for - * a given range of an inode. The buffer is returned - * locked. If other overlapping buffers exist, they are - * released before the new buffer is created and locked, - * which may imply that this call will block until those buffers - * are unlocked. No I/O is implied by this call. - */ -xfs_buf_t * -_xfs_buf_find( - xfs_buftarg_t *btp, /* block device target */ - xfs_off_t ioff, /* starting offset of range */ - size_t isize, /* length of range */ - xfs_buf_flags_t flags, - xfs_buf_t *new_bp) -{ - xfs_off_t range_base; - size_t range_length; - struct xfs_perag *pag; - struct rb_node **rbp; - struct rb_node *parent; - xfs_buf_t *bp; - - range_base = (ioff << BBSHIFT); - range_length = (isize << BBSHIFT); - - /* Check for IOs smaller than the sector size / not sector aligned */ - ASSERT(!(range_length < (1 << btp->bt_sshift))); - ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); - - /* get tree root */ - pag = xfs_perag_get(btp->bt_mount, - xfs_daddr_to_agno(btp->bt_mount, ioff)); - - /* walk tree */ - spin_lock(&pag->pag_buf_lock); - rbp = &pag->pag_buf_tree.rb_node; - parent = NULL; - bp = NULL; - while (*rbp) { - parent = *rbp; - bp = rb_entry(parent, struct xfs_buf, b_rbnode); - - if (range_base < bp->b_file_offset) - rbp = &(*rbp)->rb_left; - else if (range_base > bp->b_file_offset) - rbp = &(*rbp)->rb_right; - else { - /* - * found a block offset match. If the range doesn't - * match, the only way this is allowed is if the buffer - * in the cache is stale and the transaction that made - * it stale has not yet committed. i.e. we are - * reallocating a busy extent. Skip this buffer and - * continue searching to the right for an exact match. - */ - if (bp->b_buffer_length != range_length) { - ASSERT(bp->b_flags & XBF_STALE); - rbp = &(*rbp)->rb_right; - continue; - } - atomic_inc(&bp->b_hold); - goto found; - } - } - - /* No match found */ - if (new_bp) { - _xfs_buf_initialize(new_bp, btp, range_base, - range_length, flags); - rb_link_node(&new_bp->b_rbnode, parent, rbp); - rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); - /* the buffer keeps the perag reference until it is freed */ - new_bp->b_pag = pag; - spin_unlock(&pag->pag_buf_lock); - } else { - XFS_STATS_INC(xb_miss_locked); - spin_unlock(&pag->pag_buf_lock); - xfs_perag_put(pag); - } - return new_bp; - -found: - spin_unlock(&pag->pag_buf_lock); - xfs_perag_put(pag); - - if (!xfs_buf_trylock(bp)) { - if (flags & XBF_TRYLOCK) { - xfs_buf_rele(bp); - XFS_STATS_INC(xb_busy_locked); - return NULL; - } - xfs_buf_lock(bp); - XFS_STATS_INC(xb_get_locked_waited); - } - - /* - * if the buffer is stale, clear all the external state associated with - * it. We need to keep flags such as how we allocated the buffer memory - * intact here. - */ - if (bp->b_flags & XBF_STALE) { - ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); - bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES; - } - - trace_xfs_buf_find(bp, flags, _RET_IP_); - XFS_STATS_INC(xb_get_locked); - return bp; -} - -/* - * Assembles a buffer covering the specified range. - * Storage in memory for all portions of the buffer will be allocated, - * although backing storage may not be. - */ -xfs_buf_t * -xfs_buf_get( - xfs_buftarg_t *target,/* target for buffer */ - xfs_off_t ioff, /* starting offset of range */ - size_t isize, /* length of range */ - xfs_buf_flags_t flags) -{ - xfs_buf_t *bp, *new_bp; - int error = 0; - - new_bp = xfs_buf_allocate(flags); - if (unlikely(!new_bp)) - return NULL; - - bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); - if (bp == new_bp) { - error = xfs_buf_allocate_memory(bp, flags); - if (error) - goto no_buffer; - } else { - xfs_buf_deallocate(new_bp); - if (unlikely(bp == NULL)) - return NULL; - } - - if (!(bp->b_flags & XBF_MAPPED)) { - error = _xfs_buf_map_pages(bp, flags); - if (unlikely(error)) { - xfs_warn(target->bt_mount, - "%s: failed to map pages\n", __func__); - goto no_buffer; - } - } - - XFS_STATS_INC(xb_get); - - /* - * Always fill in the block number now, the mapped cases can do - * their own overlay of this later. - */ - bp->b_bn = ioff; - bp->b_count_desired = bp->b_buffer_length; - - trace_xfs_buf_get(bp, flags, _RET_IP_); - return bp; - - no_buffer: - if (flags & (XBF_LOCK | XBF_TRYLOCK)) - xfs_buf_unlock(bp); - xfs_buf_rele(bp); - return NULL; -} - -STATIC int -_xfs_buf_read( - xfs_buf_t *bp, - xfs_buf_flags_t flags) -{ - int status; - - ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); - ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); - - bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD); - bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); - - status = xfs_buf_iorequest(bp); - if (status || bp->b_error || (flags & XBF_ASYNC)) - return status; - return xfs_buf_iowait(bp); -} - -xfs_buf_t * -xfs_buf_read( - xfs_buftarg_t *target, - xfs_off_t ioff, - size_t isize, - xfs_buf_flags_t flags) -{ - xfs_buf_t *bp; - - flags |= XBF_READ; - - bp = xfs_buf_get(target, ioff, isize, flags); - if (bp) { - trace_xfs_buf_read(bp, flags, _RET_IP_); - - if (!XFS_BUF_ISDONE(bp)) { - XFS_STATS_INC(xb_get_read); - _xfs_buf_read(bp, flags); - } else if (flags & XBF_ASYNC) { - /* - * Read ahead call which is already satisfied, - * drop the buffer - */ - goto no_buffer; - } else { - /* We do not want read in the flags */ - bp->b_flags &= ~XBF_READ; - } - } - - return bp; - - no_buffer: - if (flags & (XBF_LOCK | XBF_TRYLOCK)) - xfs_buf_unlock(bp); - xfs_buf_rele(bp); - return NULL; -} - -/* - * If we are not low on memory then do the readahead in a deadlock - * safe manner. - */ -void -xfs_buf_readahead( - xfs_buftarg_t *target, - xfs_off_t ioff, - size_t isize) -{ - if (bdi_read_congested(target->bt_bdi)) - return; - - xfs_buf_read(target, ioff, isize, - XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK); -} - -/* - * Read an uncached buffer from disk. Allocates and returns a locked - * buffer containing the disk contents or nothing. - */ -struct xfs_buf * -xfs_buf_read_uncached( - struct xfs_mount *mp, - struct xfs_buftarg *target, - xfs_daddr_t daddr, - size_t length, - int flags) -{ - xfs_buf_t *bp; - int error; - - bp = xfs_buf_get_uncached(target, length, flags); - if (!bp) - return NULL; - - /* set up the buffer for a read IO */ - XFS_BUF_SET_ADDR(bp, daddr); - XFS_BUF_READ(bp); - - xfsbdstrat(mp, bp); - error = xfs_buf_iowait(bp); - if (error || bp->b_error) { - xfs_buf_relse(bp); - return NULL; - } - return bp; -} - -xfs_buf_t * -xfs_buf_get_empty( - size_t len, - xfs_buftarg_t *target) -{ - xfs_buf_t *bp; - - bp = xfs_buf_allocate(0); - if (bp) - _xfs_buf_initialize(bp, target, 0, len, 0); - return bp; -} - -/* - * Return a buffer allocated as an empty buffer and associated to external - * memory via xfs_buf_associate_memory() back to it's empty state. - */ -void -xfs_buf_set_empty( - struct xfs_buf *bp, - size_t len) -{ - if (bp->b_pages) - _xfs_buf_free_pages(bp); - - bp->b_pages = NULL; - bp->b_page_count = 0; - bp->b_addr = NULL; - bp->b_file_offset = 0; - bp->b_buffer_length = bp->b_count_desired = len; - bp->b_bn = XFS_BUF_DADDR_NULL; - bp->b_flags &= ~XBF_MAPPED; -} - -static inline struct page * -mem_to_page( - void *addr) -{ - if ((!is_vmalloc_addr(addr))) { - return virt_to_page(addr); - } else { - return vmalloc_to_page(addr); - } -} - -int -xfs_buf_associate_memory( - xfs_buf_t *bp, - void *mem, - size_t len) -{ - int rval; - int i = 0; - unsigned long pageaddr; - unsigned long offset; - size_t buflen; - int page_count; - - pageaddr = (unsigned long)mem & PAGE_MASK; - offset = (unsigned long)mem - pageaddr; - buflen = PAGE_ALIGN(len + offset); - page_count = buflen >> PAGE_SHIFT; - - /* Free any previous set of page pointers */ - if (bp->b_pages) - _xfs_buf_free_pages(bp); - - bp->b_pages = NULL; - bp->b_addr = mem; - - rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK); - if (rval) - return rval; - - bp->b_offset = offset; - - for (i = 0; i < bp->b_page_count; i++) { - bp->b_pages[i] = mem_to_page((void *)pageaddr); - pageaddr += PAGE_SIZE; - } - - bp->b_count_desired = len; - bp->b_buffer_length = buflen; - bp->b_flags |= XBF_MAPPED; - - return 0; -} - -xfs_buf_t * -xfs_buf_get_uncached( - struct xfs_buftarg *target, - size_t len, - int flags) -{ - unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; - int error, i; - xfs_buf_t *bp; - - bp = xfs_buf_allocate(0); - if (unlikely(bp == NULL)) - goto fail; - _xfs_buf_initialize(bp, target, 0, len, 0); - - error = _xfs_buf_get_pages(bp, page_count, 0); - if (error) - goto fail_free_buf; - - for (i = 0; i < page_count; i++) { - bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); - if (!bp->b_pages[i]) - goto fail_free_mem; - } - bp->b_flags |= _XBF_PAGES; - - error = _xfs_buf_map_pages(bp, XBF_MAPPED); - if (unlikely(error)) { - xfs_warn(target->bt_mount, - "%s: failed to map pages\n", __func__); - goto fail_free_mem; - } - - trace_xfs_buf_get_uncached(bp, _RET_IP_); - return bp; - - fail_free_mem: - while (--i >= 0) - __free_page(bp->b_pages[i]); - _xfs_buf_free_pages(bp); - fail_free_buf: - xfs_buf_deallocate(bp); - fail: - return NULL; -} - -/* - * Increment reference count on buffer, to hold the buffer concurrently - * with another thread which may release (free) the buffer asynchronously. - * Must hold the buffer already to call this function. - */ -void -xfs_buf_hold( - xfs_buf_t *bp) -{ - trace_xfs_buf_hold(bp, _RET_IP_); - atomic_inc(&bp->b_hold); -} - -/* - * Releases a hold on the specified buffer. If the - * the hold count is 1, calls xfs_buf_free. - */ -void -xfs_buf_rele( - xfs_buf_t *bp) -{ - struct xfs_perag *pag = bp->b_pag; - - trace_xfs_buf_rele(bp, _RET_IP_); - - if (!pag) { - ASSERT(list_empty(&bp->b_lru)); - ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); - if (atomic_dec_and_test(&bp->b_hold)) - xfs_buf_free(bp); - return; - } - - ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); - - ASSERT(atomic_read(&bp->b_hold) > 0); - if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { - if (!(bp->b_flags & XBF_STALE) && - atomic_read(&bp->b_lru_ref)) { - xfs_buf_lru_add(bp); - spin_unlock(&pag->pag_buf_lock); - } else { - xfs_buf_lru_del(bp); - ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); - rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); - spin_unlock(&pag->pag_buf_lock); - xfs_perag_put(pag); - xfs_buf_free(bp); - } - } -} - - -/* - * Lock a buffer object, if it is not already locked. - * - * If we come across a stale, pinned, locked buffer, we know that we are - * being asked to lock a buffer that has been reallocated. Because it is - * pinned, we know that the log has not been pushed to disk and hence it - * will still be locked. Rather than continuing to have trylock attempts - * fail until someone else pushes the log, push it ourselves before - * returning. This means that the xfsaild will not get stuck trying - * to push on stale inode buffers. - */ -int -xfs_buf_trylock( - struct xfs_buf *bp) -{ - int locked; - - locked = down_trylock(&bp->b_sema) == 0; - if (locked) - XB_SET_OWNER(bp); - else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) - xfs_log_force(bp->b_target->bt_mount, 0); - - trace_xfs_buf_trylock(bp, _RET_IP_); - return locked; -} - -/* - * Lock a buffer object. - * - * If we come across a stale, pinned, locked buffer, we know that we - * are being asked to lock a buffer that has been reallocated. Because - * it is pinned, we know that the log has not been pushed to disk and - * hence it will still be locked. Rather than sleeping until someone - * else pushes the log, push it ourselves before trying to get the lock. - */ -void -xfs_buf_lock( - struct xfs_buf *bp) -{ - trace_xfs_buf_lock(bp, _RET_IP_); - - if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) - xfs_log_force(bp->b_target->bt_mount, 0); - down(&bp->b_sema); - XB_SET_OWNER(bp); - - trace_xfs_buf_lock_done(bp, _RET_IP_); -} - -/* - * Releases the lock on the buffer object. - * If the buffer is marked delwri but is not queued, do so before we - * unlock the buffer as we need to set flags correctly. We also need to - * take a reference for the delwri queue because the unlocker is going to - * drop their's and they don't know we just queued it. - */ -void -xfs_buf_unlock( - struct xfs_buf *bp) -{ - if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { - atomic_inc(&bp->b_hold); - bp->b_flags |= XBF_ASYNC; - xfs_buf_delwri_queue(bp, 0); - } - - XB_CLEAR_OWNER(bp); - up(&bp->b_sema); - - trace_xfs_buf_unlock(bp, _RET_IP_); -} - -STATIC void -xfs_buf_wait_unpin( - xfs_buf_t *bp) -{ - DECLARE_WAITQUEUE (wait, current); - - if (atomic_read(&bp->b_pin_count) == 0) - return; - - add_wait_queue(&bp->b_waiters, &wait); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (atomic_read(&bp->b_pin_count) == 0) - break; - io_schedule(); - } - remove_wait_queue(&bp->b_waiters, &wait); - set_current_state(TASK_RUNNING); -} - -/* - * Buffer Utility Routines - */ - -STATIC void -xfs_buf_iodone_work( - struct work_struct *work) -{ - xfs_buf_t *bp = - container_of(work, xfs_buf_t, b_iodone_work); - - if (bp->b_iodone) - (*(bp->b_iodone))(bp); - else if (bp->b_flags & XBF_ASYNC) - xfs_buf_relse(bp); -} - -void -xfs_buf_ioend( - xfs_buf_t *bp, - int schedule) -{ - trace_xfs_buf_iodone(bp, _RET_IP_); - - bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); - if (bp->b_error == 0) - bp->b_flags |= XBF_DONE; - - if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { - if (schedule) { - INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); - queue_work(xfslogd_workqueue, &bp->b_iodone_work); - } else { - xfs_buf_iodone_work(&bp->b_iodone_work); - } - } else { - complete(&bp->b_iowait); - } -} - -void -xfs_buf_ioerror( - xfs_buf_t *bp, - int error) -{ - ASSERT(error >= 0 && error <= 0xffff); - bp->b_error = (unsigned short)error; - trace_xfs_buf_ioerror(bp, error, _RET_IP_); -} - -int -xfs_bwrite( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - int error; - - bp->b_flags |= XBF_WRITE; - bp->b_flags &= ~(XBF_ASYNC | XBF_READ); - - xfs_buf_delwri_dequeue(bp); - xfs_bdstrat_cb(bp); - - error = xfs_buf_iowait(bp); - if (error) - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); - xfs_buf_relse(bp); - return error; -} - -void -xfs_bdwrite( - void *mp, - struct xfs_buf *bp) -{ - trace_xfs_buf_bdwrite(bp, _RET_IP_); - - bp->b_flags &= ~XBF_READ; - bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); - - xfs_buf_delwri_queue(bp, 1); -} - -/* - * Called when we want to stop a buffer from getting written or read. - * We attach the EIO error, muck with its flags, and call xfs_buf_ioend - * so that the proper iodone callbacks get called. - */ -STATIC int -xfs_bioerror( - xfs_buf_t *bp) -{ -#ifdef XFSERRORDEBUG - ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); -#endif - - /* - * No need to wait until the buffer is unpinned, we aren't flushing it. - */ - xfs_buf_ioerror(bp, EIO); - - /* - * We're calling xfs_buf_ioend, so delete XBF_DONE flag. - */ - XFS_BUF_UNREAD(bp); - XFS_BUF_UNDELAYWRITE(bp); - XFS_BUF_UNDONE(bp); - XFS_BUF_STALE(bp); - - xfs_buf_ioend(bp, 0); - - return EIO; -} - -/* - * Same as xfs_bioerror, except that we are releasing the buffer - * here ourselves, and avoiding the xfs_buf_ioend call. - * This is meant for userdata errors; metadata bufs come with - * iodone functions attached, so that we can track down errors. - */ -STATIC int -xfs_bioerror_relse( - struct xfs_buf *bp) -{ - int64_t fl = bp->b_flags; - /* - * No need to wait until the buffer is unpinned. - * We aren't flushing it. - * - * chunkhold expects B_DONE to be set, whether - * we actually finish the I/O or not. We don't want to - * change that interface. - */ - XFS_BUF_UNREAD(bp); - XFS_BUF_UNDELAYWRITE(bp); - XFS_BUF_DONE(bp); - XFS_BUF_STALE(bp); - bp->b_iodone = NULL; - if (!(fl & XBF_ASYNC)) { - /* - * Mark b_error and B_ERROR _both_. - * Lot's of chunkcache code assumes that. - * There's no reason to mark error for - * ASYNC buffers. - */ - xfs_buf_ioerror(bp, EIO); - XFS_BUF_FINISH_IOWAIT(bp); - } else { - xfs_buf_relse(bp); - } - - return EIO; -} - - -/* - * All xfs metadata buffers except log state machine buffers - * get this attached as their b_bdstrat callback function. - * This is so that we can catch a buffer - * after prematurely unpinning it to forcibly shutdown the filesystem. - */ -int -xfs_bdstrat_cb( - struct xfs_buf *bp) -{ - if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - /* - * Metadata write that didn't get logged but - * written delayed anyway. These aren't associated - * with a transaction, and can be ignored. - */ - if (!bp->b_iodone && !XFS_BUF_ISREAD(bp)) - return xfs_bioerror_relse(bp); - else - return xfs_bioerror(bp); - } - - xfs_buf_iorequest(bp); - return 0; -} - -/* - * Wrapper around bdstrat so that we can stop data from going to disk in case - * we are shutting down the filesystem. Typically user data goes thru this - * path; one of the exceptions is the superblock. - */ -void -xfsbdstrat( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - if (XFS_FORCED_SHUTDOWN(mp)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - xfs_bioerror_relse(bp); - return; - } - - xfs_buf_iorequest(bp); -} - -STATIC void -_xfs_buf_ioend( - xfs_buf_t *bp, - int schedule) -{ - if (atomic_dec_and_test(&bp->b_io_remaining) == 1) - xfs_buf_ioend(bp, schedule); -} - -STATIC void -xfs_buf_bio_end_io( - struct bio *bio, - int error) -{ - xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; - - xfs_buf_ioerror(bp, -error); - - if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) - invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); - - _xfs_buf_ioend(bp, 1); - bio_put(bio); -} - -STATIC void -_xfs_buf_ioapply( - xfs_buf_t *bp) -{ - int rw, map_i, total_nr_pages, nr_pages; - struct bio *bio; - int offset = bp->b_offset; - int size = bp->b_count_desired; - sector_t sector = bp->b_bn; - - total_nr_pages = bp->b_page_count; - map_i = 0; - - if (bp->b_flags & XBF_WRITE) { - if (bp->b_flags & XBF_SYNCIO) - rw = WRITE_SYNC; - else - rw = WRITE; - if (bp->b_flags & XBF_FUA) - rw |= REQ_FUA; - if (bp->b_flags & XBF_FLUSH) - rw |= REQ_FLUSH; - } else if (bp->b_flags & XBF_READ_AHEAD) { - rw = READA; - } else { - rw = READ; - } - - /* we only use the buffer cache for meta-data */ - rw |= REQ_META; - -next_chunk: - atomic_inc(&bp->b_io_remaining); - nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); - if (nr_pages > total_nr_pages) - nr_pages = total_nr_pages; - - bio = bio_alloc(GFP_NOIO, nr_pages); - bio->bi_bdev = bp->b_target->bt_bdev; - bio->bi_sector = sector; - bio->bi_end_io = xfs_buf_bio_end_io; - bio->bi_private = bp; - - - for (; size && nr_pages; nr_pages--, map_i++) { - int rbytes, nbytes = PAGE_SIZE - offset; - - if (nbytes > size) - nbytes = size; - - rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset); - if (rbytes < nbytes) - break; - - offset = 0; - sector += nbytes >> BBSHIFT; - size -= nbytes; - total_nr_pages--; - } - - if (likely(bio->bi_size)) { - if (xfs_buf_is_vmapped(bp)) { - flush_kernel_vmap_range(bp->b_addr, - xfs_buf_vmap_len(bp)); - } - submit_bio(rw, bio); - if (size) - goto next_chunk; - } else { - xfs_buf_ioerror(bp, EIO); - bio_put(bio); - } -} - -int -xfs_buf_iorequest( - xfs_buf_t *bp) -{ - trace_xfs_buf_iorequest(bp, _RET_IP_); - - if (bp->b_flags & XBF_DELWRI) { - xfs_buf_delwri_queue(bp, 1); - return 0; - } - - if (bp->b_flags & XBF_WRITE) { - xfs_buf_wait_unpin(bp); - } - - xfs_buf_hold(bp); - - /* Set the count to 1 initially, this will stop an I/O - * completion callout which happens before we have started - * all the I/O from calling xfs_buf_ioend too early. - */ - atomic_set(&bp->b_io_remaining, 1); - _xfs_buf_ioapply(bp); - _xfs_buf_ioend(bp, 0); - - xfs_buf_rele(bp); - return 0; -} - -/* - * Waits for I/O to complete on the buffer supplied. - * It returns immediately if no I/O is pending. - * It returns the I/O error code, if any, or 0 if there was no error. - */ -int -xfs_buf_iowait( - xfs_buf_t *bp) -{ - trace_xfs_buf_iowait(bp, _RET_IP_); - - wait_for_completion(&bp->b_iowait); - - trace_xfs_buf_iowait_done(bp, _RET_IP_); - return bp->b_error; -} - -xfs_caddr_t -xfs_buf_offset( - xfs_buf_t *bp, - size_t offset) -{ - struct page *page; - - if (bp->b_flags & XBF_MAPPED) - return bp->b_addr + offset; - - offset += bp->b_offset; - page = bp->b_pages[offset >> PAGE_SHIFT]; - return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); -} - -/* - * Move data into or out of a buffer. - */ -void -xfs_buf_iomove( - xfs_buf_t *bp, /* buffer to process */ - size_t boff, /* starting buffer offset */ - size_t bsize, /* length to copy */ - void *data, /* data address */ - xfs_buf_rw_t mode) /* read/write/zero flag */ -{ - size_t bend, cpoff, csize; - struct page *page; - - bend = boff + bsize; - while (boff < bend) { - page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; - cpoff = xfs_buf_poff(boff + bp->b_offset); - csize = min_t(size_t, - PAGE_SIZE-cpoff, bp->b_count_desired-boff); - - ASSERT(((csize + cpoff) <= PAGE_SIZE)); - - switch (mode) { - case XBRW_ZERO: - memset(page_address(page) + cpoff, 0, csize); - break; - case XBRW_READ: - memcpy(data, page_address(page) + cpoff, csize); - break; - case XBRW_WRITE: - memcpy(page_address(page) + cpoff, data, csize); - } - - boff += csize; - data += csize; - } -} - -/* - * Handling of buffer targets (buftargs). - */ - -/* - * Wait for any bufs with callbacks that have been submitted but have not yet - * returned. These buffers will have an elevated hold count, so wait on those - * while freeing all the buffers only held by the LRU. - */ -void -xfs_wait_buftarg( - struct xfs_buftarg *btp) -{ - struct xfs_buf *bp; - -restart: - spin_lock(&btp->bt_lru_lock); - while (!list_empty(&btp->bt_lru)) { - bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); - if (atomic_read(&bp->b_hold) > 1) { - spin_unlock(&btp->bt_lru_lock); - delay(100); - goto restart; - } - /* - * clear the LRU reference count so the bufer doesn't get - * ignored in xfs_buf_rele(). - */ - atomic_set(&bp->b_lru_ref, 0); - spin_unlock(&btp->bt_lru_lock); - xfs_buf_rele(bp); - spin_lock(&btp->bt_lru_lock); - } - spin_unlock(&btp->bt_lru_lock); -} - -int -xfs_buftarg_shrink( - struct shrinker *shrink, - struct shrink_control *sc) -{ - struct xfs_buftarg *btp = container_of(shrink, - struct xfs_buftarg, bt_shrinker); - struct xfs_buf *bp; - int nr_to_scan = sc->nr_to_scan; - LIST_HEAD(dispose); - - if (!nr_to_scan) - return btp->bt_lru_nr; - - spin_lock(&btp->bt_lru_lock); - while (!list_empty(&btp->bt_lru)) { - if (nr_to_scan-- <= 0) - break; - - bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); - - /* - * Decrement the b_lru_ref count unless the value is already - * zero. If the value is already zero, we need to reclaim the - * buffer, otherwise it gets another trip through the LRU. - */ - if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { - list_move_tail(&bp->b_lru, &btp->bt_lru); - continue; - } - - /* - * remove the buffer from the LRU now to avoid needing another - * lock round trip inside xfs_buf_rele(). - */ - list_move(&bp->b_lru, &dispose); - btp->bt_lru_nr--; - } - spin_unlock(&btp->bt_lru_lock); - - while (!list_empty(&dispose)) { - bp = list_first_entry(&dispose, struct xfs_buf, b_lru); - list_del_init(&bp->b_lru); - xfs_buf_rele(bp); - } - - return btp->bt_lru_nr; -} - -void -xfs_free_buftarg( - struct xfs_mount *mp, - struct xfs_buftarg *btp) -{ - unregister_shrinker(&btp->bt_shrinker); - - xfs_flush_buftarg(btp, 1); - if (mp->m_flags & XFS_MOUNT_BARRIER) - xfs_blkdev_issue_flush(btp); - - kthread_stop(btp->bt_task); - kmem_free(btp); -} - -STATIC int -xfs_setsize_buftarg_flags( - xfs_buftarg_t *btp, - unsigned int blocksize, - unsigned int sectorsize, - int verbose) -{ - btp->bt_bsize = blocksize; - btp->bt_sshift = ffs(sectorsize) - 1; - btp->bt_smask = sectorsize - 1; - - if (set_blocksize(btp->bt_bdev, sectorsize)) { - xfs_warn(btp->bt_mount, - "Cannot set_blocksize to %u on device %s\n", - sectorsize, xfs_buf_target_name(btp)); - return EINVAL; - } - - return 0; -} - -/* - * When allocating the initial buffer target we have not yet - * read in the superblock, so don't know what sized sectors - * are being used is at this early stage. Play safe. - */ -STATIC int -xfs_setsize_buftarg_early( - xfs_buftarg_t *btp, - struct block_device *bdev) -{ - return xfs_setsize_buftarg_flags(btp, - PAGE_SIZE, bdev_logical_block_size(bdev), 0); -} - -int -xfs_setsize_buftarg( - xfs_buftarg_t *btp, - unsigned int blocksize, - unsigned int sectorsize) -{ - return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); -} - -STATIC int -xfs_alloc_delwrite_queue( - xfs_buftarg_t *btp, - const char *fsname) -{ - INIT_LIST_HEAD(&btp->bt_delwrite_queue); - spin_lock_init(&btp->bt_delwrite_lock); - btp->bt_flags = 0; - btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); - if (IS_ERR(btp->bt_task)) - return PTR_ERR(btp->bt_task); - return 0; -} - -xfs_buftarg_t * -xfs_alloc_buftarg( - struct xfs_mount *mp, - struct block_device *bdev, - int external, - const char *fsname) -{ - xfs_buftarg_t *btp; - - btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); - - btp->bt_mount = mp; - btp->bt_dev = bdev->bd_dev; - btp->bt_bdev = bdev; - btp->bt_bdi = blk_get_backing_dev_info(bdev); - if (!btp->bt_bdi) - goto error; - - INIT_LIST_HEAD(&btp->bt_lru); - spin_lock_init(&btp->bt_lru_lock); - if (xfs_setsize_buftarg_early(btp, bdev)) - goto error; - if (xfs_alloc_delwrite_queue(btp, fsname)) - goto error; - btp->bt_shrinker.shrink = xfs_buftarg_shrink; - btp->bt_shrinker.seeks = DEFAULT_SEEKS; - register_shrinker(&btp->bt_shrinker); - return btp; - -error: - kmem_free(btp); - return NULL; -} - - -/* - * Delayed write buffer handling - */ -STATIC void -xfs_buf_delwri_queue( - xfs_buf_t *bp, - int unlock) -{ - struct list_head *dwq = &bp->b_target->bt_delwrite_queue; - spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; - - trace_xfs_buf_delwri_queue(bp, _RET_IP_); - - ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); - - spin_lock(dwlk); - /* If already in the queue, dequeue and place at tail */ - if (!list_empty(&bp->b_list)) { - ASSERT(bp->b_flags & _XBF_DELWRI_Q); - if (unlock) - atomic_dec(&bp->b_hold); - list_del(&bp->b_list); - } - - if (list_empty(dwq)) { - /* start xfsbufd as it is about to have something to do */ - wake_up_process(bp->b_target->bt_task); - } - - bp->b_flags |= _XBF_DELWRI_Q; - list_add_tail(&bp->b_list, dwq); - bp->b_queuetime = jiffies; - spin_unlock(dwlk); - - if (unlock) - xfs_buf_unlock(bp); -} - -void -xfs_buf_delwri_dequeue( - xfs_buf_t *bp) -{ - spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; - int dequeued = 0; - - spin_lock(dwlk); - if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { - ASSERT(bp->b_flags & _XBF_DELWRI_Q); - list_del_init(&bp->b_list); - dequeued = 1; - } - bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); - spin_unlock(dwlk); - - if (dequeued) - xfs_buf_rele(bp); - - trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); -} - -/* - * If a delwri buffer needs to be pushed before it has aged out, then promote - * it to the head of the delwri queue so that it will be flushed on the next - * xfsbufd run. We do this by resetting the queuetime of the buffer to be older - * than the age currently needed to flush the buffer. Hence the next time the - * xfsbufd sees it is guaranteed to be considered old enough to flush. - */ -void -xfs_buf_delwri_promote( - struct xfs_buf *bp) -{ - struct xfs_buftarg *btp = bp->b_target; - long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1; - - ASSERT(bp->b_flags & XBF_DELWRI); - ASSERT(bp->b_flags & _XBF_DELWRI_Q); - - /* - * Check the buffer age before locking the delayed write queue as we - * don't need to promote buffers that are already past the flush age. - */ - if (bp->b_queuetime < jiffies - age) - return; - bp->b_queuetime = jiffies - age; - spin_lock(&btp->bt_delwrite_lock); - list_move(&bp->b_list, &btp->bt_delwrite_queue); - spin_unlock(&btp->bt_delwrite_lock); -} - -STATIC void -xfs_buf_runall_queues( - struct workqueue_struct *queue) -{ - flush_workqueue(queue); -} - -/* - * Move as many buffers as specified to the supplied list - * idicating if we skipped any buffers to prevent deadlocks. - */ -STATIC int -xfs_buf_delwri_split( - xfs_buftarg_t *target, - struct list_head *list, - unsigned long age) -{ - xfs_buf_t *bp, *n; - struct list_head *dwq = &target->bt_delwrite_queue; - spinlock_t *dwlk = &target->bt_delwrite_lock; - int skipped = 0; - int force; - - force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); - INIT_LIST_HEAD(list); - spin_lock(dwlk); - list_for_each_entry_safe(bp, n, dwq, b_list) { - ASSERT(bp->b_flags & XBF_DELWRI); - - if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) { - if (!force && - time_before(jiffies, bp->b_queuetime + age)) { - xfs_buf_unlock(bp); - break; - } - - bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q); - bp->b_flags |= XBF_WRITE; - list_move_tail(&bp->b_list, list); - trace_xfs_buf_delwri_split(bp, _RET_IP_); - } else - skipped++; - } - spin_unlock(dwlk); - - return skipped; - -} - -/* - * Compare function is more complex than it needs to be because - * the return value is only 32 bits and we are doing comparisons - * on 64 bit values - */ -static int -xfs_buf_cmp( - void *priv, - struct list_head *a, - struct list_head *b) -{ - struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); - struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); - xfs_daddr_t diff; - - diff = ap->b_bn - bp->b_bn; - if (diff < 0) - return -1; - if (diff > 0) - return 1; - return 0; -} - -STATIC int -xfsbufd( - void *data) -{ - xfs_buftarg_t *target = (xfs_buftarg_t *)data; - - current->flags |= PF_MEMALLOC; - - set_freezable(); - - do { - long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); - long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); - struct list_head tmp; - struct blk_plug plug; - - if (unlikely(freezing(current))) { - set_bit(XBT_FORCE_SLEEP, &target->bt_flags); - refrigerator(); - } else { - clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); - } - - /* sleep for a long time if there is nothing to do. */ - if (list_empty(&target->bt_delwrite_queue)) - tout = MAX_SCHEDULE_TIMEOUT; - schedule_timeout_interruptible(tout); - - xfs_buf_delwri_split(target, &tmp, age); - list_sort(NULL, &tmp, xfs_buf_cmp); - - blk_start_plug(&plug); - while (!list_empty(&tmp)) { - struct xfs_buf *bp; - bp = list_first_entry(&tmp, struct xfs_buf, b_list); - list_del_init(&bp->b_list); - xfs_bdstrat_cb(bp); - } - blk_finish_plug(&plug); - } while (!kthread_should_stop()); - - return 0; -} - -/* - * Go through all incore buffers, and release buffers if they belong to - * the given device. This is used in filesystem error handling to - * preserve the consistency of its metadata. - */ -int -xfs_flush_buftarg( - xfs_buftarg_t *target, - int wait) -{ - xfs_buf_t *bp; - int pincount = 0; - LIST_HEAD(tmp_list); - LIST_HEAD(wait_list); - struct blk_plug plug; - - xfs_buf_runall_queues(xfsconvertd_workqueue); - xfs_buf_runall_queues(xfsdatad_workqueue); - xfs_buf_runall_queues(xfslogd_workqueue); - - set_bit(XBT_FORCE_FLUSH, &target->bt_flags); - pincount = xfs_buf_delwri_split(target, &tmp_list, 0); - - /* - * Dropped the delayed write list lock, now walk the temporary list. - * All I/O is issued async and then if we need to wait for completion - * we do that after issuing all the IO. - */ - list_sort(NULL, &tmp_list, xfs_buf_cmp); - - blk_start_plug(&plug); - while (!list_empty(&tmp_list)) { - bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); - ASSERT(target == bp->b_target); - list_del_init(&bp->b_list); - if (wait) { - bp->b_flags &= ~XBF_ASYNC; - list_add(&bp->b_list, &wait_list); - } - xfs_bdstrat_cb(bp); - } - blk_finish_plug(&plug); - - if (wait) { - /* Wait for IO to complete. */ - while (!list_empty(&wait_list)) { - bp = list_first_entry(&wait_list, struct xfs_buf, b_list); - - list_del_init(&bp->b_list); - xfs_buf_iowait(bp); - xfs_buf_relse(bp); - } - } - - return pincount; -} - -int __init -xfs_buf_init(void) -{ - xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", - KM_ZONE_HWALIGN, NULL); - if (!xfs_buf_zone) - goto out; - - xfslogd_workqueue = alloc_workqueue("xfslogd", - WQ_MEM_RECLAIM | WQ_HIGHPRI, 1); - if (!xfslogd_workqueue) - goto out_free_buf_zone; - - xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1); - if (!xfsdatad_workqueue) - goto out_destroy_xfslogd_workqueue; - - xfsconvertd_workqueue = alloc_workqueue("xfsconvertd", - WQ_MEM_RECLAIM, 1); - if (!xfsconvertd_workqueue) - goto out_destroy_xfsdatad_workqueue; - - return 0; - - out_destroy_xfsdatad_workqueue: - destroy_workqueue(xfsdatad_workqueue); - out_destroy_xfslogd_workqueue: - destroy_workqueue(xfslogd_workqueue); - out_free_buf_zone: - kmem_zone_destroy(xfs_buf_zone); - out: - return -ENOMEM; -} - -void -xfs_buf_terminate(void) -{ - destroy_workqueue(xfsconvertd_workqueue); - destroy_workqueue(xfsdatad_workqueue); - destroy_workqueue(xfslogd_workqueue); - kmem_zone_destroy(xfs_buf_zone); -} - -#ifdef CONFIG_KDB_MODULES -struct list_head * -xfs_get_buftarg_list(void) -{ - return &xfs_buftarg_list; -} -#endif diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h deleted file mode 100644 index 620972b..0000000 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ /dev/null @@ -1,326 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_BUF_H__ -#define __XFS_BUF_H__ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Base types - */ - -#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) - -#define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) -#define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -#define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) -#define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) - -typedef enum { - XBRW_READ = 1, /* transfer into target memory */ - XBRW_WRITE = 2, /* transfer from target memory */ - XBRW_ZERO = 3, /* Zero target memory */ -} xfs_buf_rw_t; - -#define XBF_READ (1 << 0) /* buffer intended for reading from device */ -#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ -#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ -#define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */ -#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ -#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ -#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ -#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ - -/* I/O hints for the BIO layer */ -#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ -#define XBF_FUA (1 << 11)/* force cache write through mode */ -#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ - -/* flags used only as arguments to access routines */ -#define XBF_LOCK (1 << 15)/* lock requested */ -#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ -#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */ - -/* flags used only internally */ -#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ -#define _XBF_KMEM (1 << 21)/* backed by heap memory */ -#define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */ - -typedef unsigned int xfs_buf_flags_t; - -#define XFS_BUF_FLAGS \ - { XBF_READ, "READ" }, \ - { XBF_WRITE, "WRITE" }, \ - { XBF_READ_AHEAD, "READ_AHEAD" }, \ - { XBF_MAPPED, "MAPPED" }, \ - { XBF_ASYNC, "ASYNC" }, \ - { XBF_DONE, "DONE" }, \ - { XBF_DELWRI, "DELWRI" }, \ - { XBF_STALE, "STALE" }, \ - { XBF_SYNCIO, "SYNCIO" }, \ - { XBF_FUA, "FUA" }, \ - { XBF_FLUSH, "FLUSH" }, \ - { XBF_LOCK, "LOCK" }, /* should never be set */\ - { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ - { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ - { _XBF_PAGES, "PAGES" }, \ - { _XBF_KMEM, "KMEM" }, \ - { _XBF_DELWRI_Q, "DELWRI_Q" } - -typedef enum { - XBT_FORCE_SLEEP = 0, - XBT_FORCE_FLUSH = 1, -} xfs_buftarg_flags_t; - -typedef struct xfs_buftarg { - dev_t bt_dev; - struct block_device *bt_bdev; - struct backing_dev_info *bt_bdi; - struct xfs_mount *bt_mount; - unsigned int bt_bsize; - unsigned int bt_sshift; - size_t bt_smask; - - /* per device delwri queue */ - struct task_struct *bt_task; - struct list_head bt_delwrite_queue; - spinlock_t bt_delwrite_lock; - unsigned long bt_flags; - - /* LRU control structures */ - struct shrinker bt_shrinker; - struct list_head bt_lru; - spinlock_t bt_lru_lock; - unsigned int bt_lru_nr; -} xfs_buftarg_t; - -struct xfs_buf; -typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); - -#define XB_PAGES 2 - -typedef struct xfs_buf { - /* - * first cacheline holds all the fields needed for an uncontended cache - * hit to be fully processed. The semaphore straddles the cacheline - * boundary, but the counter and lock sits on the first cacheline, - * which is the only bit that is touched if we hit the semaphore - * fast-path on locking. - */ - struct rb_node b_rbnode; /* rbtree node */ - xfs_off_t b_file_offset; /* offset in file */ - size_t b_buffer_length;/* size of buffer in bytes */ - atomic_t b_hold; /* reference count */ - atomic_t b_lru_ref; /* lru reclaim ref count */ - xfs_buf_flags_t b_flags; /* status flags */ - struct semaphore b_sema; /* semaphore for lockables */ - - struct list_head b_lru; /* lru list */ - wait_queue_head_t b_waiters; /* unpin waiters */ - struct list_head b_list; - struct xfs_perag *b_pag; /* contains rbtree root */ - xfs_buftarg_t *b_target; /* buffer target (device) */ - xfs_daddr_t b_bn; /* block number for I/O */ - size_t b_count_desired;/* desired transfer size */ - void *b_addr; /* virtual address of buffer */ - struct work_struct b_iodone_work; - xfs_buf_iodone_t b_iodone; /* I/O completion function */ - struct completion b_iowait; /* queue for I/O waiters */ - void *b_fspriv; - struct xfs_trans *b_transp; - struct page **b_pages; /* array of page pointers */ - struct page *b_page_array[XB_PAGES]; /* inline pages */ - unsigned long b_queuetime; /* time buffer was queued */ - atomic_t b_pin_count; /* pin count */ - atomic_t b_io_remaining; /* #outstanding I/O requests */ - unsigned int b_page_count; /* size of page array */ - unsigned int b_offset; /* page offset in first page */ - unsigned short b_error; /* error code on I/O */ -#ifdef XFS_BUF_LOCK_TRACKING - int b_last_holder; -#endif -} xfs_buf_t; - - -/* Finding and Reading Buffers */ -extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t, - xfs_buf_flags_t, xfs_buf_t *); -#define xfs_incore(buftarg,blkno,len,lockit) \ - _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) - -extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t, - xfs_buf_flags_t); -extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, - xfs_buf_flags_t); - -extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); -extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); -extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); -extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); -extern void xfs_buf_hold(xfs_buf_t *); -extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t); -struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp, - struct xfs_buftarg *target, - xfs_daddr_t daddr, size_t length, int flags); - -/* Releasing Buffers */ -extern void xfs_buf_free(xfs_buf_t *); -extern void xfs_buf_rele(xfs_buf_t *); - -/* Locking and Unlocking Buffers */ -extern int xfs_buf_trylock(xfs_buf_t *); -extern void xfs_buf_lock(xfs_buf_t *); -extern void xfs_buf_unlock(xfs_buf_t *); -#define xfs_buf_islocked(bp) \ - ((bp)->b_sema.count <= 0) - -/* Buffer Read and Write Routines */ -extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); -extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); - -extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -extern int xfs_bdstrat_cb(struct xfs_buf *); - -extern void xfs_buf_ioend(xfs_buf_t *, int); -extern void xfs_buf_ioerror(xfs_buf_t *, int); -extern int xfs_buf_iorequest(xfs_buf_t *); -extern int xfs_buf_iowait(xfs_buf_t *); -extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, - xfs_buf_rw_t); -#define xfs_buf_zero(bp, off, len) \ - xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) - -static inline int xfs_buf_geterror(xfs_buf_t *bp) -{ - return bp ? bp->b_error : ENOMEM; -} - -/* Buffer Utility Routines */ -extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); - -/* Delayed Write Buffer Routines */ -extern void xfs_buf_delwri_dequeue(xfs_buf_t *); -extern void xfs_buf_delwri_promote(xfs_buf_t *); - -/* Buffer Daemon Setup Routines */ -extern int xfs_buf_init(void); -extern void xfs_buf_terminate(void); - -static inline const char * -xfs_buf_target_name(struct xfs_buftarg *target) -{ - static char __b[BDEVNAME_SIZE]; - - return bdevname(target->bt_bdev, __b); -} - - -#define XFS_BUF_ZEROFLAGS(bp) \ - ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ - XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) - -void xfs_buf_stale(struct xfs_buf *bp); -#define XFS_BUF_STALE(bp) xfs_buf_stale(bp); -#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) -#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) -#define XFS_BUF_SUPER_STALE(bp) do { \ - XFS_BUF_STALE(bp); \ - xfs_buf_delwri_dequeue(bp); \ - XFS_BUF_DONE(bp); \ - } while (0) - -#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) -#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) -#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) - -#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) -#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) -#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) - -#define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) -#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) -#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) - -#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) -#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) -#define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) - -#define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE) -#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) -#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) - -#define XFS_BUF_ADDR(bp) ((bp)->b_bn) -#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) -#define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) -#define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off)) -#define XFS_BUF_COUNT(bp) ((bp)->b_count_desired) -#define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt)) -#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) -#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) - -static inline void -xfs_buf_set_ref( - struct xfs_buf *bp, - int lru_ref) -{ - atomic_set(&bp->b_lru_ref, lru_ref); -} -#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) -#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) - -static inline int xfs_buf_ispinned(struct xfs_buf *bp) -{ - return atomic_read(&bp->b_pin_count); -} - -#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); - -static inline void xfs_buf_relse(xfs_buf_t *bp) -{ - xfs_buf_unlock(bp); - xfs_buf_rele(bp); -} - -/* - * Handling of buftargs. - */ -extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, - struct block_device *, int, const char *); -extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); -extern void xfs_wait_buftarg(xfs_buftarg_t *); -extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); -extern int xfs_flush_buftarg(xfs_buftarg_t *, int); - -#ifdef CONFIG_KDB_MODULES -extern struct list_head *xfs_get_buftarg_list(void); -#endif - -#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) -#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) - -#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1) -#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1) - -#endif /* __XFS_BUF_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c deleted file mode 100644 index 244e797..0000000 --- a/fs/xfs/linux-2.6/xfs_discard.c +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Copyright (C) 2010 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_sb.h" -#include "xfs_inum.h" -#include "xfs_log.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_quota.h" -#include "xfs_trans.h" -#include "xfs_alloc_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_btree.h" -#include "xfs_inode.h" -#include "xfs_alloc.h" -#include "xfs_error.h" -#include "xfs_discard.h" -#include "xfs_trace.h" - -STATIC int -xfs_trim_extents( - struct xfs_mount *mp, - xfs_agnumber_t agno, - xfs_fsblock_t start, - xfs_fsblock_t len, - xfs_fsblock_t minlen, - __uint64_t *blocks_trimmed) -{ - struct block_device *bdev = mp->m_ddev_targp->bt_bdev; - struct xfs_btree_cur *cur; - struct xfs_buf *agbp; - struct xfs_perag *pag; - int error; - int i; - - pag = xfs_perag_get(mp, agno); - - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); - if (error || !agbp) - goto out_put_perag; - - cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); - - /* - * Force out the log. This means any transactions that might have freed - * space before we took the AGF buffer lock are now on disk, and the - * volatile disk cache is flushed. - */ - xfs_log_force(mp, XFS_LOG_SYNC); - - /* - * Look up the longest btree in the AGF and start with it. - */ - error = xfs_alloc_lookup_le(cur, 0, - XFS_BUF_TO_AGF(agbp)->agf_longest, &i); - if (error) - goto out_del_cursor; - - /* - * Loop until we are done with all extents that are large - * enough to be worth discarding. - */ - while (i) { - xfs_agblock_t fbno; - xfs_extlen_t flen; - - error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); - if (error) - goto out_del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); - ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); - - /* - * Too small? Give up. - */ - if (flen < minlen) { - trace_xfs_discard_toosmall(mp, agno, fbno, flen); - goto out_del_cursor; - } - - /* - * If the extent is entirely outside of the range we are - * supposed to discard skip it. Do not bother to trim - * down partially overlapping ranges for now. - */ - if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || - XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) { - trace_xfs_discard_exclude(mp, agno, fbno, flen); - goto next_extent; - } - - /* - * If any blocks in the range are still busy, skip the - * discard and try again the next time. - */ - if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { - trace_xfs_discard_busy(mp, agno, fbno, flen); - goto next_extent; - } - - trace_xfs_discard_extent(mp, agno, fbno, flen); - error = -blkdev_issue_discard(bdev, - XFS_AGB_TO_DADDR(mp, agno, fbno), - XFS_FSB_TO_BB(mp, flen), - GFP_NOFS, 0); - if (error) - goto out_del_cursor; - *blocks_trimmed += flen; - -next_extent: - error = xfs_btree_decrement(cur, 0, &i); - if (error) - goto out_del_cursor; - } - -out_del_cursor: - xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - xfs_buf_relse(agbp); -out_put_perag: - xfs_perag_put(pag); - return error; -} - -int -xfs_ioc_trim( - struct xfs_mount *mp, - struct fstrim_range __user *urange) -{ - struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; - unsigned int granularity = q->limits.discard_granularity; - struct fstrim_range range; - xfs_fsblock_t start, len, minlen; - xfs_agnumber_t start_agno, end_agno, agno; - __uint64_t blocks_trimmed = 0; - int error, last_error = 0; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - if (!blk_queue_discard(q)) - return -XFS_ERROR(EOPNOTSUPP); - if (copy_from_user(&range, urange, sizeof(range))) - return -XFS_ERROR(EFAULT); - - /* - * Truncating down the len isn't actually quite correct, but using - * XFS_B_TO_FSB would mean we trivially get overflows for values - * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default - * used by the fstrim application. In the end it really doesn't - * matter as trimming blocks is an advisory interface. - */ - start = XFS_B_TO_FSBT(mp, range.start); - len = XFS_B_TO_FSBT(mp, range.len); - minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); - - start_agno = XFS_FSB_TO_AGNO(mp, start); - if (start_agno >= mp->m_sb.sb_agcount) - return -XFS_ERROR(EINVAL); - - end_agno = XFS_FSB_TO_AGNO(mp, start + len); - if (end_agno >= mp->m_sb.sb_agcount) - end_agno = mp->m_sb.sb_agcount - 1; - - for (agno = start_agno; agno <= end_agno; agno++) { - error = -xfs_trim_extents(mp, agno, start, len, minlen, - &blocks_trimmed); - if (error) - last_error = error; - } - - if (last_error) - return last_error; - - range.len = XFS_FSB_TO_B(mp, blocks_trimmed); - if (copy_to_user(urange, &range, sizeof(range))) - return -XFS_ERROR(EFAULT); - return 0; -} - -int -xfs_discard_extents( - struct xfs_mount *mp, - struct list_head *list) -{ - struct xfs_busy_extent *busyp; - int error = 0; - - list_for_each_entry(busyp, list, list) { - trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, - busyp->length); - - error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, - XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), - XFS_FSB_TO_BB(mp, busyp->length), - GFP_NOFS, 0); - if (error && error != EOPNOTSUPP) { - xfs_info(mp, - "discard failed for extent [0x%llu,%u], error %d", - (unsigned long long)busyp->bno, - busyp->length, - error); - return error; - } - } - - return 0; -} diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h deleted file mode 100644 index 344879a..0000000 --- a/fs/xfs/linux-2.6/xfs_discard.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef XFS_DISCARD_H -#define XFS_DISCARD_H 1 - -struct fstrim_range; -struct list_head; - -extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); -extern int xfs_discard_extents(struct xfs_mount *, struct list_head *); - -#endif /* XFS_DISCARD_H */ diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c deleted file mode 100644 index 75e5d32..0000000 --- a/fs/xfs/linux-2.6/xfs_export.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (c) 2004-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_types.h" -#include "xfs_inum.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_mount.h" -#include "xfs_export.h" -#include "xfs_vnodeops.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_trace.h" - -/* - * Note that we only accept fileids which are long enough rather than allow - * the parent generation number to default to zero. XFS considers zero a - * valid generation number not an invalid/wildcard value. - */ -static int xfs_fileid_length(int fileid_type) -{ - switch (fileid_type) { - case FILEID_INO32_GEN: - return 2; - case FILEID_INO32_GEN_PARENT: - return 4; - case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: - return 3; - case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: - return 6; - } - return 255; /* invalid */ -} - -STATIC int -xfs_fs_encode_fh( - struct dentry *dentry, - __u32 *fh, - int *max_len, - int connectable) -{ - struct fid *fid = (struct fid *)fh; - struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; - struct inode *inode = dentry->d_inode; - int fileid_type; - int len; - - /* Directories don't need their parent encoded, they have ".." */ - if (S_ISDIR(inode->i_mode) || !connectable) - fileid_type = FILEID_INO32_GEN; - else - fileid_type = FILEID_INO32_GEN_PARENT; - - /* - * If the the filesystem may contain 64bit inode numbers, we need - * to use larger file handles that can represent them. - * - * While we only allocate inodes that do not fit into 32 bits any - * large enough filesystem may contain them, thus the slightly - * confusing looking conditional below. - */ - if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) || - (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES)) - fileid_type |= XFS_FILEID_TYPE_64FLAG; - - /* - * Only encode if there is enough space given. In practice - * this means we can't export a filesystem with 64bit inodes - * over NFSv2 with the subtree_check export option; the other - * seven combinations work. The real answer is "don't use v2". - */ - len = xfs_fileid_length(fileid_type); - if (*max_len < len) { - *max_len = len; - return 255; - } - *max_len = len; - - switch (fileid_type) { - case FILEID_INO32_GEN_PARENT: - spin_lock(&dentry->d_lock); - fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino; - fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; - spin_unlock(&dentry->d_lock); - /*FALLTHRU*/ - case FILEID_INO32_GEN: - fid->i32.ino = inode->i_ino; - fid->i32.gen = inode->i_generation; - break; - case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: - spin_lock(&dentry->d_lock); - fid64->parent_ino = dentry->d_parent->d_inode->i_ino; - fid64->parent_gen = dentry->d_parent->d_inode->i_generation; - spin_unlock(&dentry->d_lock); - /*FALLTHRU*/ - case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: - fid64->ino = inode->i_ino; - fid64->gen = inode->i_generation; - break; - } - - return fileid_type; -} - -STATIC struct inode * -xfs_nfs_get_inode( - struct super_block *sb, - u64 ino, - u32 generation) - { - xfs_mount_t *mp = XFS_M(sb); - xfs_inode_t *ip; - int error; - - /* - * NFS can sometimes send requests for ino 0. Fail them gracefully. - */ - if (ino == 0) - return ERR_PTR(-ESTALE); - - /* - * The XFS_IGET_UNTRUSTED means that an invalid inode number is just - * fine and not an indication of a corrupted filesystem as clients can - * send invalid file handles and we have to handle it gracefully.. - */ - error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip); - if (error) { - /* - * EINVAL means the inode cluster doesn't exist anymore. - * This implies the filehandle is stale, so we should - * translate it here. - * We don't use ESTALE directly down the chain to not - * confuse applications using bulkstat that expect EINVAL. - */ - if (error == EINVAL || error == ENOENT) - error = ESTALE; - return ERR_PTR(-error); - } - - if (ip->i_d.di_gen != generation) { - IRELE(ip); - return ERR_PTR(-ESTALE); - } - - return VFS_I(ip); -} - -STATIC struct dentry * -xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fileid_type) -{ - struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; - struct inode *inode = NULL; - - if (fh_len < xfs_fileid_length(fileid_type)) - return NULL; - - switch (fileid_type) { - case FILEID_INO32_GEN_PARENT: - case FILEID_INO32_GEN: - inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen); - break; - case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: - case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: - inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen); - break; - } - - return d_obtain_alias(inode); -} - -STATIC struct dentry * -xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fileid_type) -{ - struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; - struct inode *inode = NULL; - - switch (fileid_type) { - case FILEID_INO32_GEN_PARENT: - inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino, - fid->i32.parent_gen); - break; - case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: - inode = xfs_nfs_get_inode(sb, fid64->parent_ino, - fid64->parent_gen); - break; - } - - return d_obtain_alias(inode); -} - -STATIC struct dentry * -xfs_fs_get_parent( - struct dentry *child) -{ - int error; - struct xfs_inode *cip; - - error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); - if (unlikely(error)) - return ERR_PTR(-error); - - return d_obtain_alias(VFS_I(cip)); -} - -STATIC int -xfs_fs_nfs_commit_metadata( - struct inode *inode) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - int error = 0; - - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (xfs_ipincount(ip)) { - error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn, - XFS_LOG_SYNC, NULL); - } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - return error; -} - -const struct export_operations xfs_export_operations = { - .encode_fh = xfs_fs_encode_fh, - .fh_to_dentry = xfs_fs_fh_to_dentry, - .fh_to_parent = xfs_fs_fh_to_parent, - .get_parent = xfs_fs_get_parent, - .commit_metadata = xfs_fs_nfs_commit_metadata, -}; diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h deleted file mode 100644 index 3272b6a..0000000 --- a/fs/xfs/linux-2.6/xfs_export.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_EXPORT_H__ -#define __XFS_EXPORT_H__ - -/* - * Common defines for code related to exporting XFS filesystems over NFS. - * - * The NFS fileid goes out on the wire as an array of - * 32bit unsigned ints in host order. There are 5 possible - * formats. - * - * (1) fileid_type=0x00 - * (no fileid data; handled by the generic code) - * - * (2) fileid_type=0x01 - * inode-num - * generation - * - * (3) fileid_type=0x02 - * inode-num - * generation - * parent-inode-num - * parent-generation - * - * (4) fileid_type=0x81 - * inode-num-lo32 - * inode-num-hi32 - * generation - * - * (5) fileid_type=0x82 - * inode-num-lo32 - * inode-num-hi32 - * generation - * parent-inode-num-lo32 - * parent-inode-num-hi32 - * parent-generation - * - * Note, the NFS filehandle also includes an fsid portion which - * may have an inode number in it. That number is hardcoded to - * 32bits and there is no way for XFS to intercept it. In - * practice this means when exporting an XFS filesystem with 64bit - * inodes you should either export the mountpoint (rather than - * a subdirectory) or use the "fsid" export option. - */ - -struct xfs_fid64 { - u64 ino; - u32 gen; - u64 parent_ino; - u32 parent_gen; -} __attribute__((packed)); - -/* This flag goes on the wire. Don't play with it. */ -#define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */ - -#endif /* __XFS_EXPORT_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c deleted file mode 100644 index 7f7b424..0000000 --- a/fs/xfs/linux-2.6/xfs_file.c +++ /dev/null @@ -1,1096 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_trans.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_bmap.h" -#include "xfs_error.h" -#include "xfs_vnodeops.h" -#include "xfs_da_btree.h" -#include "xfs_ioctl.h" -#include "xfs_trace.h" - -#include -#include - -static const struct vm_operations_struct xfs_file_vm_ops; - -/* - * Locking primitives for read and write IO paths to ensure we consistently use - * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. - */ -static inline void -xfs_rw_ilock( - struct xfs_inode *ip, - int type) -{ - if (type & XFS_IOLOCK_EXCL) - mutex_lock(&VFS_I(ip)->i_mutex); - xfs_ilock(ip, type); -} - -static inline void -xfs_rw_iunlock( - struct xfs_inode *ip, - int type) -{ - xfs_iunlock(ip, type); - if (type & XFS_IOLOCK_EXCL) - mutex_unlock(&VFS_I(ip)->i_mutex); -} - -static inline void -xfs_rw_ilock_demote( - struct xfs_inode *ip, - int type) -{ - xfs_ilock_demote(ip, type); - if (type & XFS_IOLOCK_EXCL) - mutex_unlock(&VFS_I(ip)->i_mutex); -} - -/* - * xfs_iozero - * - * xfs_iozero clears the specified range of buffer supplied, - * and marks all the affected blocks as valid and modified. If - * an affected block is not allocated, it will be allocated. If - * an affected block is not completely overwritten, and is not - * valid before the operation, it will be read from disk before - * being partially zeroed. - */ -STATIC int -xfs_iozero( - struct xfs_inode *ip, /* inode */ - loff_t pos, /* offset in file */ - size_t count) /* size of data to zero */ -{ - struct page *page; - struct address_space *mapping; - int status; - - mapping = VFS_I(ip)->i_mapping; - do { - unsigned offset, bytes; - void *fsdata; - - offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ - bytes = PAGE_CACHE_SIZE - offset; - if (bytes > count) - bytes = count; - - status = pagecache_write_begin(NULL, mapping, pos, bytes, - AOP_FLAG_UNINTERRUPTIBLE, - &page, &fsdata); - if (status) - break; - - zero_user(page, offset, bytes); - - status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, - page, fsdata); - WARN_ON(status <= 0); /* can't return less than zero! */ - pos += bytes; - count -= bytes; - status = 0; - } while (count); - - return (-status); -} - -STATIC int -xfs_file_fsync( - struct file *file, - loff_t start, - loff_t end, - int datasync) -{ - struct inode *inode = file->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - int error = 0; - int log_flushed = 0; - - trace_xfs_file_fsync(ip); - - error = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (error) - return error; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); - - xfs_iflags_clear(ip, XFS_ITRUNCATED); - - xfs_ilock(ip, XFS_IOLOCK_SHARED); - xfs_ioend_wait(ip); - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - - if (mp->m_flags & XFS_MOUNT_BARRIER) { - /* - * If we have an RT and/or log subvolume we need to make sure - * to flush the write cache the device used for file data - * first. This is to ensure newly written file data make - * it to disk before logging the new inode size in case of - * an extending write. - */ - if (XFS_IS_REALTIME_INODE(ip)) - xfs_blkdev_issue_flush(mp->m_rtdev_targp); - else if (mp->m_logdev_targp != mp->m_ddev_targp) - xfs_blkdev_issue_flush(mp->m_ddev_targp); - } - - /* - * We always need to make sure that the required inode state is safe on - * disk. The inode might be clean but we still might need to force the - * log because of committed transactions that haven't hit the disk yet. - * Likewise, there could be unflushed non-transactional changes to the - * inode core that have to go to disk and this requires us to issue - * a synchronous transaction to capture these changes correctly. - * - * This code relies on the assumption that if the i_update_core field - * of the inode is clear and the inode is unpinned then it is clean - * and no action is required. - */ - xfs_ilock(ip, XFS_ILOCK_SHARED); - - /* - * First check if the VFS inode is marked dirty. All the dirtying - * of non-transactional updates no goes through mark_inode_dirty*, - * which allows us to distinguish beteeen pure timestamp updates - * and i_size updates which need to be caught for fdatasync. - * After that also theck for the dirty state in the XFS inode, which - * might gets cleared when the inode gets written out via the AIL - * or xfs_iflush_cluster. - */ - if (((inode->i_state & I_DIRTY_DATASYNC) || - ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && - ip->i_update_core) { - /* - * Kick off a transaction to log the inode core to get the - * updates. The sync transaction will also force the log. - */ - xfs_iunlock(ip, XFS_ILOCK_SHARED); - tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); - error = xfs_trans_reserve(tp, 0, - XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return -error; - } - xfs_ilock(ip, XFS_ILOCK_EXCL); - - /* - * Note - it's possible that we might have pushed ourselves out - * of the way during trans_reserve which would flush the inode. - * But there's no guarantee that the inode buffer has actually - * gone out yet (it's delwri). Plus the buffer could be pinned - * anyway if it's part of an inode in another recent - * transaction. So we play it safe and fire off the - * transaction anyway. - */ - xfs_trans_ijoin(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); - error = _xfs_trans_commit(tp, 0, &log_flushed); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } else { - /* - * Timestamps/size haven't changed since last inode flush or - * inode transaction commit. That means either nothing got - * written or a transaction committed which caught the updates. - * If the latter happened and the transaction hasn't hit the - * disk yet, the inode will be still be pinned. If it is, - * force the log. - */ - if (xfs_ipincount(ip)) { - error = _xfs_log_force_lsn(mp, - ip->i_itemp->ili_last_lsn, - XFS_LOG_SYNC, &log_flushed); - } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - } - - /* - * If we only have a single device, and the log force about was - * a no-op we might have to flush the data device cache here. - * This can only happen for fdatasync/O_DSYNC if we were overwriting - * an already allocated file and thus do not have any metadata to - * commit. - */ - if ((mp->m_flags & XFS_MOUNT_BARRIER) && - mp->m_logdev_targp == mp->m_ddev_targp && - !XFS_IS_REALTIME_INODE(ip) && - !log_flushed) - xfs_blkdev_issue_flush(mp->m_ddev_targp); - - return -error; -} - -STATIC ssize_t -xfs_file_aio_read( - struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - size_t size = 0; - ssize_t ret = 0; - int ioflags = 0; - xfs_fsize_t n; - unsigned long seg; - - XFS_STATS_INC(xs_read_calls); - - BUG_ON(iocb->ki_pos != pos); - - if (unlikely(file->f_flags & O_DIRECT)) - ioflags |= IO_ISDIRECT; - if (file->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - /* START copy & waste from filemap.c */ - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *iv = &iovp[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - size += iv->iov_len; - if (unlikely((ssize_t)(size|iv->iov_len) < 0)) - return XFS_ERROR(-EINVAL); - } - /* END copy & waste from filemap.c */ - - if (unlikely(ioflags & IO_ISDIRECT)) { - xfs_buftarg_t *target = - XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp; - if ((iocb->ki_pos & target->bt_smask) || - (size & target->bt_smask)) { - if (iocb->ki_pos == ip->i_size) - return 0; - return -XFS_ERROR(EINVAL); - } - } - - n = XFS_MAXIOFFSET(mp) - iocb->ki_pos; - if (n <= 0 || size == 0) - return 0; - - if (n < size) - size = n; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - if (unlikely(ioflags & IO_ISDIRECT)) { - xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); - - if (inode->i_mapping->nrpages) { - ret = -xfs_flushinval_pages(ip, - (iocb->ki_pos & PAGE_CACHE_MASK), - -1, FI_REMAPF_LOCKED); - if (ret) { - xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); - return ret; - } - } - xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); - } else - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - - trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); - - ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos); - if (ret > 0) - XFS_STATS_ADD(xs_read_bytes, ret); - - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); - return ret; -} - -STATIC ssize_t -xfs_file_splice_read( - struct file *infilp, - loff_t *ppos, - struct pipe_inode_info *pipe, - size_t count, - unsigned int flags) -{ - struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); - int ioflags = 0; - ssize_t ret; - - XFS_STATS_INC(xs_read_calls); - - if (infilp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; - - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - - trace_xfs_file_splice_read(ip, count, *ppos, ioflags); - - ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); - if (ret > 0) - XFS_STATS_ADD(xs_read_bytes, ret); - - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); - return ret; -} - -STATIC void -xfs_aio_write_isize_update( - struct inode *inode, - loff_t *ppos, - ssize_t bytes_written) -{ - struct xfs_inode *ip = XFS_I(inode); - xfs_fsize_t isize = i_size_read(inode); - - if (bytes_written > 0) - XFS_STATS_ADD(xs_write_bytes, bytes_written); - - if (unlikely(bytes_written < 0 && bytes_written != -EFAULT && - *ppos > isize)) - *ppos = isize; - - if (*ppos > ip->i_size) { - xfs_rw_ilock(ip, XFS_ILOCK_EXCL); - if (*ppos > ip->i_size) - ip->i_size = *ppos; - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); - } -} - -/* - * If this was a direct or synchronous I/O that failed (such as ENOSPC) then - * part of the I/O may have been written to disk before the error occurred. In - * this case the on-disk file size may have been adjusted beyond the in-memory - * file size and now needs to be truncated back. - */ -STATIC void -xfs_aio_write_newsize_update( - struct xfs_inode *ip) -{ - if (ip->i_new_size) { - xfs_rw_ilock(ip, XFS_ILOCK_EXCL); - ip->i_new_size = 0; - if (ip->i_d.di_size > ip->i_size) - ip->i_d.di_size = ip->i_size; - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); - } -} - -/* - * xfs_file_splice_write() does not use xfs_rw_ilock() because - * generic_file_splice_write() takes the i_mutex itself. This, in theory, - * couuld cause lock inversions between the aio_write path and the splice path - * if someone is doing concurrent splice(2) based writes and write(2) based - * writes to the same inode. The only real way to fix this is to re-implement - * the generic code here with correct locking orders. - */ -STATIC ssize_t -xfs_file_splice_write( - struct pipe_inode_info *pipe, - struct file *outfilp, - loff_t *ppos, - size_t count, - unsigned int flags) -{ - struct inode *inode = outfilp->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - xfs_fsize_t new_size; - int ioflags = 0; - ssize_t ret; - - XFS_STATS_INC(xs_write_calls); - - if (outfilp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - new_size = *ppos + count; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (new_size > ip->i_size) - ip->i_new_size = new_size; - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - trace_xfs_file_splice_write(ip, count, *ppos, ioflags); - - ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); - - xfs_aio_write_isize_update(inode, ppos, ret); - xfs_aio_write_newsize_update(ip); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return ret; -} - -/* - * This routine is called to handle zeroing any space in the last - * block of the file that is beyond the EOF. We do this since the - * size is being increased without writing anything to that block - * and we don't want anyone to read the garbage on the disk. - */ -STATIC int /* error (positive) */ -xfs_zero_last_block( - xfs_inode_t *ip, - xfs_fsize_t offset, - xfs_fsize_t isize) -{ - xfs_fileoff_t last_fsb; - xfs_mount_t *mp = ip->i_mount; - int nimaps; - int zero_offset; - int zero_len; - int error = 0; - xfs_bmbt_irec_t imap; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - zero_offset = XFS_B_FSB_OFFSET(mp, isize); - if (zero_offset == 0) { - /* - * There are no extra bytes in the last block on disk to - * zero, so return. - */ - return 0; - } - - last_fsb = XFS_B_TO_FSBT(mp, isize); - nimaps = 1; - error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, - &nimaps, NULL); - if (error) { - return error; - } - ASSERT(nimaps > 0); - /* - * If the block underlying isize is just a hole, then there - * is nothing to zero. - */ - if (imap.br_startblock == HOLESTARTBLOCK) { - return 0; - } - /* - * Zero the part of the last block beyond the EOF, and write it - * out sync. We need to drop the ilock while we do this so we - * don't deadlock when the buffer cache calls back to us. - */ - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - zero_len = mp->m_sb.sb_blocksize - zero_offset; - if (isize + zero_len > offset) - zero_len = offset - isize; - error = xfs_iozero(ip, isize, zero_len); - - xfs_ilock(ip, XFS_ILOCK_EXCL); - ASSERT(error >= 0); - return error; -} - -/* - * Zero any on disk space between the current EOF and the new, - * larger EOF. This handles the normal case of zeroing the remainder - * of the last block in the file and the unusual case of zeroing blocks - * out beyond the size of the file. This second case only happens - * with fixed size extents and when the system crashes before the inode - * size was updated but after blocks were allocated. If fill is set, - * then any holes in the range are filled and zeroed. If not, the holes - * are left alone as holes. - */ - -int /* error (positive) */ -xfs_zero_eof( - xfs_inode_t *ip, - xfs_off_t offset, /* starting I/O offset */ - xfs_fsize_t isize) /* current inode size */ -{ - xfs_mount_t *mp = ip->i_mount; - xfs_fileoff_t start_zero_fsb; - xfs_fileoff_t end_zero_fsb; - xfs_fileoff_t zero_count_fsb; - xfs_fileoff_t last_fsb; - xfs_fileoff_t zero_off; - xfs_fsize_t zero_len; - int nimaps; - int error = 0; - xfs_bmbt_irec_t imap; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); - ASSERT(offset > isize); - - /* - * First handle zeroing the block on which isize resides. - * We only zero a part of that block so it is handled specially. - */ - error = xfs_zero_last_block(ip, offset, isize); - if (error) { - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); - return error; - } - - /* - * Calculate the range between the new size and the old - * where blocks needing to be zeroed may exist. To get the - * block where the last byte in the file currently resides, - * we need to subtract one from the size and truncate back - * to a block boundary. We subtract 1 in case the size is - * exactly on a block boundary. - */ - last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; - start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); - end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); - ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); - if (last_fsb == end_zero_fsb) { - /* - * The size was only incremented on its last block. - * We took care of that above, so just return. - */ - return 0; - } - - ASSERT(start_zero_fsb <= end_zero_fsb); - while (start_zero_fsb <= end_zero_fsb) { - nimaps = 1; - zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; - error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, - 0, NULL, 0, &imap, &nimaps, NULL); - if (error) { - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); - return error; - } - ASSERT(nimaps > 0); - - if (imap.br_state == XFS_EXT_UNWRITTEN || - imap.br_startblock == HOLESTARTBLOCK) { - /* - * This loop handles initializing pages that were - * partially initialized by the code below this - * loop. It basically zeroes the part of the page - * that sits on a hole and sets the page as P_HOLE - * and calls remapf if it is a mapped file. - */ - start_zero_fsb = imap.br_startoff + imap.br_blockcount; - ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); - continue; - } - - /* - * There are blocks we need to zero. - * Drop the inode lock while we're doing the I/O. - * We'll still have the iolock to protect us. - */ - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); - zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); - - if ((zero_off + zero_len) > offset) - zero_len = offset - zero_off; - - error = xfs_iozero(ip, zero_off, zero_len); - if (error) { - goto out_lock; - } - - start_zero_fsb = imap.br_startoff + imap.br_blockcount; - ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); - - xfs_ilock(ip, XFS_ILOCK_EXCL); - } - - return 0; - -out_lock: - xfs_ilock(ip, XFS_ILOCK_EXCL); - ASSERT(error >= 0); - return error; -} - -/* - * Common pre-write limit and setup checks. - * - * Returns with iolock held according to @iolock. - */ -STATIC ssize_t -xfs_file_aio_write_checks( - struct file *file, - loff_t *pos, - size_t *count, - int *iolock) -{ - struct inode *inode = file->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - xfs_fsize_t new_size; - int error = 0; - - error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); - if (error) { - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); - *iolock = 0; - return error; - } - - new_size = *pos + *count; - if (new_size > ip->i_size) - ip->i_new_size = new_size; - - if (likely(!(file->f_mode & FMODE_NOCMTIME))) - file_update_time(file); - - /* - * If the offset is beyond the size of the file, we need to zero any - * blocks that fall between the existing EOF and the start of this - * write. - */ - if (*pos > ip->i_size) - error = -xfs_zero_eof(ip, *pos, ip->i_size); - - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); - if (error) - return error; - - /* - * If we're writing the file then make sure to clear the setuid and - * setgid bits if the process is not being run by root. This keeps - * people from modifying setuid and setgid binaries. - */ - return file_remove_suid(file); - -} - -/* - * xfs_file_dio_aio_write - handle direct IO writes - * - * Lock the inode appropriately to prepare for and issue a direct IO write. - * By separating it from the buffered write path we remove all the tricky to - * follow locking changes and looping. - * - * If there are cached pages or we're extending the file, we need IOLOCK_EXCL - * until we're sure the bytes at the new EOF have been zeroed and/or the cached - * pages are flushed out. - * - * In most cases the direct IO writes will be done holding IOLOCK_SHARED - * allowing them to be done in parallel with reads and other direct IO writes. - * However, if the IO is not aligned to filesystem blocks, the direct IO layer - * needs to do sub-block zeroing and that requires serialisation against other - * direct IOs to the same block. In this case we need to serialise the - * submission of the unaligned IOs so that we don't get racing block zeroing in - * the dio layer. To avoid the problem with aio, we also need to wait for - * outstanding IOs to complete so that unwritten extent conversion is completed - * before we try to map the overlapping block. This is currently implemented by - * hitting it with a big hammer (i.e. xfs_ioend_wait()). - * - * Returns with locks held indicated by @iolock and errors indicated by - * negative return values. - */ -STATIC ssize_t -xfs_file_dio_aio_write( - struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos, - size_t ocount, - int *iolock) -{ - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - ssize_t ret = 0; - size_t count = ocount; - int unaligned_io = 0; - struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp; - - *iolock = 0; - if ((pos & target->bt_smask) || (count & target->bt_smask)) - return -XFS_ERROR(EINVAL); - - if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) - unaligned_io = 1; - - if (unaligned_io || mapping->nrpages || pos > ip->i_size) - *iolock = XFS_IOLOCK_EXCL; - else - *iolock = XFS_IOLOCK_SHARED; - xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); - - ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); - if (ret) - return ret; - - if (mapping->nrpages) { - WARN_ON(*iolock != XFS_IOLOCK_EXCL); - ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, - FI_REMAPF_LOCKED); - if (ret) - return ret; - } - - /* - * If we are doing unaligned IO, wait for all other IO to drain, - * otherwise demote the lock if we had to flush cached pages - */ - if (unaligned_io) - xfs_ioend_wait(ip); - else if (*iolock == XFS_IOLOCK_EXCL) { - xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); - *iolock = XFS_IOLOCK_SHARED; - } - - trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); - ret = generic_file_direct_write(iocb, iovp, - &nr_segs, pos, &iocb->ki_pos, count, ocount); - - /* No fallback to buffered IO on errors for XFS. */ - ASSERT(ret < 0 || ret == count); - return ret; -} - -STATIC ssize_t -xfs_file_buffered_aio_write( - struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos, - size_t ocount, - int *iolock) -{ - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct xfs_inode *ip = XFS_I(inode); - ssize_t ret; - int enospc = 0; - size_t count = ocount; - - *iolock = XFS_IOLOCK_EXCL; - xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); - - ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); - if (ret) - return ret; - - /* We can write back this queue in page reclaim */ - current->backing_dev_info = mapping->backing_dev_info; - -write_retry: - trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); - ret = generic_file_buffered_write(iocb, iovp, nr_segs, - pos, &iocb->ki_pos, count, ret); - /* - * if we just got an ENOSPC, flush the inode now we aren't holding any - * page locks and retry *once* - */ - if (ret == -ENOSPC && !enospc) { - ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); - if (ret) - return ret; - enospc = 1; - goto write_retry; - } - current->backing_dev_info = NULL; - return ret; -} - -STATIC ssize_t -xfs_file_aio_write( - struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos) -{ - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct xfs_inode *ip = XFS_I(inode); - ssize_t ret; - int iolock; - size_t ocount = 0; - - XFS_STATS_INC(xs_write_calls); - - BUG_ON(iocb->ki_pos != pos); - - ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); - if (ret) - return ret; - - if (ocount == 0) - return 0; - - xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; - - if (unlikely(file->f_flags & O_DIRECT)) - ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, - ocount, &iolock); - else - ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, - ocount, &iolock); - - xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); - - if (ret <= 0) - goto out_unlock; - - /* Handle various SYNC-type writes */ - if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { - loff_t end = pos + ret - 1; - int error; - - xfs_rw_iunlock(ip, iolock); - error = xfs_file_fsync(file, pos, end, - (file->f_flags & __O_SYNC) ? 0 : 1); - xfs_rw_ilock(ip, iolock); - if (error) - ret = error; - } - -out_unlock: - xfs_aio_write_newsize_update(ip); - xfs_rw_iunlock(ip, iolock); - return ret; -} - -STATIC long -xfs_file_fallocate( - struct file *file, - int mode, - loff_t offset, - loff_t len) -{ - struct inode *inode = file->f_path.dentry->d_inode; - long error; - loff_t new_size = 0; - xfs_flock64_t bf; - xfs_inode_t *ip = XFS_I(inode); - int cmd = XFS_IOC_RESVSP; - int attr_flags = XFS_ATTR_NOLOCK; - - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) - return -EOPNOTSUPP; - - bf.l_whence = 0; - bf.l_start = offset; - bf.l_len = len; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - if (mode & FALLOC_FL_PUNCH_HOLE) - cmd = XFS_IOC_UNRESVSP; - - /* check the new inode size is valid before allocating */ - if (!(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) { - new_size = offset + len; - error = inode_newsize_ok(inode, new_size); - if (error) - goto out_unlock; - } - - if (file->f_flags & O_DSYNC) - attr_flags |= XFS_ATTR_SYNC; - - error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); - if (error) - goto out_unlock; - - /* Change file size if needed */ - if (new_size) { - struct iattr iattr; - - iattr.ia_valid = ATTR_SIZE; - iattr.ia_size = new_size; - error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK); - } - -out_unlock: - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return error; -} - - -STATIC int -xfs_file_open( - struct inode *inode, - struct file *file) -{ - if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) - return -EFBIG; - if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) - return -EIO; - return 0; -} - -STATIC int -xfs_dir_open( - struct inode *inode, - struct file *file) -{ - struct xfs_inode *ip = XFS_I(inode); - int mode; - int error; - - error = xfs_file_open(inode, file); - if (error) - return error; - - /* - * If there are any blocks, read-ahead block 0 as we're almost - * certain to have the next operation be a read there. - */ - mode = xfs_ilock_map_shared(ip); - if (ip->i_d.di_nextents > 0) - xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); - xfs_iunlock(ip, mode); - return 0; -} - -STATIC int -xfs_file_release( - struct inode *inode, - struct file *filp) -{ - return -xfs_release(XFS_I(inode)); -} - -STATIC int -xfs_file_readdir( - struct file *filp, - void *dirent, - filldir_t filldir) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - xfs_inode_t *ip = XFS_I(inode); - int error; - size_t bufsize; - - /* - * The Linux API doesn't pass down the total size of the buffer - * we read into down to the filesystem. With the filldir concept - * it's not needed for correct information, but the XFS dir2 leaf - * code wants an estimate of the buffer size to calculate it's - * readahead window and size the buffers used for mapping to - * physical blocks. - * - * Try to give it an estimate that's good enough, maybe at some - * point we can change the ->readdir prototype to include the - * buffer size. For now we use the current glibc buffer size. - */ - bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); - - error = xfs_readdir(ip, dirent, bufsize, - (xfs_off_t *)&filp->f_pos, filldir); - if (error) - return -error; - return 0; -} - -STATIC int -xfs_file_mmap( - struct file *filp, - struct vm_area_struct *vma) -{ - vma->vm_ops = &xfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; - - file_accessed(filp); - return 0; -} - -/* - * mmap()d file has taken write protection fault and is being made - * writable. We can set the page state up correctly for a writable - * page, which means we can do correct delalloc accounting (ENOSPC - * checking!) and unwritten extent mapping. - */ -STATIC int -xfs_vm_page_mkwrite( - struct vm_area_struct *vma, - struct vm_fault *vmf) -{ - return block_page_mkwrite(vma, vmf, xfs_get_blocks); -} - -const struct file_operations xfs_file_operations = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = xfs_file_aio_read, - .aio_write = xfs_file_aio_write, - .splice_read = xfs_file_splice_read, - .splice_write = xfs_file_splice_write, - .unlocked_ioctl = xfs_file_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = xfs_file_compat_ioctl, -#endif - .mmap = xfs_file_mmap, - .open = xfs_file_open, - .release = xfs_file_release, - .fsync = xfs_file_fsync, - .fallocate = xfs_file_fallocate, -}; - -const struct file_operations xfs_dir_file_operations = { - .open = xfs_dir_open, - .read = generic_read_dir, - .readdir = xfs_file_readdir, - .llseek = generic_file_llseek, - .unlocked_ioctl = xfs_file_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = xfs_file_compat_ioctl, -#endif - .fsync = xfs_file_fsync, -}; - -static const struct vm_operations_struct xfs_file_vm_ops = { - .fault = filemap_fault, - .page_mkwrite = xfs_vm_page_mkwrite, -}; diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c deleted file mode 100644 index ed88ed1..0000000 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_vnodeops.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_trace.h" - -/* - * note: all filemap functions return negative error codes. These - * need to be inverted before returning to the xfs core functions. - */ -void -xfs_tosspages( - xfs_inode_t *ip, - xfs_off_t first, - xfs_off_t last, - int fiopt) -{ - /* can't toss partial tail pages, so mask them out */ - last &= ~(PAGE_SIZE - 1); - truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1); -} - -int -xfs_flushinval_pages( - xfs_inode_t *ip, - xfs_off_t first, - xfs_off_t last, - int fiopt) -{ - struct address_space *mapping = VFS_I(ip)->i_mapping; - int ret = 0; - - trace_xfs_pagecache_inval(ip, first, last); - - xfs_iflags_clear(ip, XFS_ITRUNCATED); - ret = filemap_write_and_wait_range(mapping, first, - last == -1 ? LLONG_MAX : last); - if (!ret) - truncate_inode_pages_range(mapping, first, last); - return -ret; -} - -int -xfs_flush_pages( - xfs_inode_t *ip, - xfs_off_t first, - xfs_off_t last, - uint64_t flags, - int fiopt) -{ - struct address_space *mapping = VFS_I(ip)->i_mapping; - int ret = 0; - int ret2; - - xfs_iflags_clear(ip, XFS_ITRUNCATED); - ret = -filemap_fdatawrite_range(mapping, first, - last == -1 ? LLONG_MAX : last); - if (flags & XBF_ASYNC) - return ret; - ret2 = xfs_wait_on_pages(ip, first, last); - if (!ret) - ret = ret2; - return ret; -} - -int -xfs_wait_on_pages( - xfs_inode_t *ip, - xfs_off_t first, - xfs_off_t last) -{ - struct address_space *mapping = VFS_I(ip)->i_mapping; - - if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { - return -filemap_fdatawait_range(mapping, first, - last == -1 ? ip->i_size - 1 : last); - } - return 0; -} diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c deleted file mode 100644 index 76e81cf..0000000 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_sysctl.h" - -/* - * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, - * other XFS code uses these values. Times are measured in centisecs (i.e. - * 100ths of a second). - */ -xfs_param_t xfs_params = { - /* MIN DFLT MAX */ - .sgid_inherit = { 0, 0, 1 }, - .symlink_mode = { 0, 0, 1 }, - .panic_mask = { 0, 0, 255 }, - .error_level = { 0, 3, 11 }, - .syncd_timer = { 1*100, 30*100, 7200*100}, - .stats_clear = { 0, 0, 1 }, - .inherit_sync = { 0, 1, 1 }, - .inherit_nodump = { 0, 1, 1 }, - .inherit_noatim = { 0, 1, 1 }, - .xfs_buf_timer = { 100/2, 1*100, 30*100 }, - .xfs_buf_age = { 1*100, 15*100, 7200*100}, - .inherit_nosym = { 0, 0, 1 }, - .rotorstep = { 1, 1, 255 }, - .inherit_nodfrg = { 0, 1, 1 }, - .fstrm_timer = { 1, 30*100, 3600*100}, -}; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c deleted file mode 100644 index f7ce7de..0000000 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ /dev/null @@ -1,1556 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_ioctl.h" -#include "xfs_rtalloc.h" -#include "xfs_itable.h" -#include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_bmap.h" -#include "xfs_buf_item.h" -#include "xfs_utils.h" -#include "xfs_dfrag.h" -#include "xfs_fsops.h" -#include "xfs_vnodeops.h" -#include "xfs_discard.h" -#include "xfs_quota.h" -#include "xfs_inode_item.h" -#include "xfs_export.h" -#include "xfs_trace.h" - -#include -#include -#include -#include -#include -#include -#include - -/* - * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to - * a file or fs handle. - * - * XFS_IOC_PATH_TO_FSHANDLE - * returns fs handle for a mount point or path within that mount point - * XFS_IOC_FD_TO_HANDLE - * returns full handle for a FD opened in user space - * XFS_IOC_PATH_TO_HANDLE - * returns full handle for a path - */ -int -xfs_find_handle( - unsigned int cmd, - xfs_fsop_handlereq_t *hreq) -{ - int hsize; - xfs_handle_t handle; - struct inode *inode; - struct file *file = NULL; - struct path path; - int error; - struct xfs_inode *ip; - - if (cmd == XFS_IOC_FD_TO_HANDLE) { - file = fget(hreq->fd); - if (!file) - return -EBADF; - inode = file->f_path.dentry->d_inode; - } else { - error = user_lpath((const char __user *)hreq->path, &path); - if (error) - return error; - inode = path.dentry->d_inode; - } - ip = XFS_I(inode); - - /* - * We can only generate handles for inodes residing on a XFS filesystem, - * and only for regular files, directories or symbolic links. - */ - error = -EINVAL; - if (inode->i_sb->s_magic != XFS_SB_MAGIC) - goto out_put; - - error = -EBADF; - if (!S_ISREG(inode->i_mode) && - !S_ISDIR(inode->i_mode) && - !S_ISLNK(inode->i_mode)) - goto out_put; - - - memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); - - if (cmd == XFS_IOC_PATH_TO_FSHANDLE) { - /* - * This handle only contains an fsid, zero the rest. - */ - memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); - hsize = sizeof(xfs_fsid_t); - } else { - int lock_mode; - - lock_mode = xfs_ilock_map_shared(ip); - handle.ha_fid.fid_len = sizeof(xfs_fid_t) - - sizeof(handle.ha_fid.fid_len); - handle.ha_fid.fid_pad = 0; - handle.ha_fid.fid_gen = ip->i_d.di_gen; - handle.ha_fid.fid_ino = ip->i_ino; - xfs_iunlock_map_shared(ip, lock_mode); - - hsize = XFS_HSIZE(handle); - } - - error = -EFAULT; - if (copy_to_user(hreq->ohandle, &handle, hsize) || - copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) - goto out_put; - - error = 0; - - out_put: - if (cmd == XFS_IOC_FD_TO_HANDLE) - fput(file); - else - path_put(&path); - return error; -} - -/* - * No need to do permission checks on the various pathname components - * as the handle operations are privileged. - */ -STATIC int -xfs_handle_acceptable( - void *context, - struct dentry *dentry) -{ - return 1; -} - -/* - * Convert userspace handle data into a dentry. - */ -struct dentry * -xfs_handle_to_dentry( - struct file *parfilp, - void __user *uhandle, - u32 hlen) -{ - xfs_handle_t handle; - struct xfs_fid64 fid; - - /* - * Only allow handle opens under a directory. - */ - if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode)) - return ERR_PTR(-ENOTDIR); - - if (hlen != sizeof(xfs_handle_t)) - return ERR_PTR(-EINVAL); - if (copy_from_user(&handle, uhandle, hlen)) - return ERR_PTR(-EFAULT); - if (handle.ha_fid.fid_len != - sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len)) - return ERR_PTR(-EINVAL); - - memset(&fid, 0, sizeof(struct fid)); - fid.ino = handle.ha_fid.fid_ino; - fid.gen = handle.ha_fid.fid_gen; - - return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3, - FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, - xfs_handle_acceptable, NULL); -} - -STATIC struct dentry * -xfs_handlereq_to_dentry( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq) -{ - return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen); -} - -int -xfs_open_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq) -{ - const struct cred *cred = current_cred(); - int error; - int fd; - int permflag; - struct file *filp; - struct inode *inode; - struct dentry *dentry; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - - dentry = xfs_handlereq_to_dentry(parfilp, hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - inode = dentry->d_inode; - - /* Restrict xfs_open_by_handle to directories & regular files. */ - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { - error = -XFS_ERROR(EPERM); - goto out_dput; - } - -#if BITS_PER_LONG != 32 - hreq->oflags |= O_LARGEFILE; -#endif - - /* Put open permission in namei format. */ - permflag = hreq->oflags; - if ((permflag+1) & O_ACCMODE) - permflag++; - if (permflag & O_TRUNC) - permflag |= 2; - - if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && - (permflag & FMODE_WRITE) && IS_APPEND(inode)) { - error = -XFS_ERROR(EPERM); - goto out_dput; - } - - if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) { - error = -XFS_ERROR(EACCES); - goto out_dput; - } - - /* Can't write directories. */ - if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) { - error = -XFS_ERROR(EISDIR); - goto out_dput; - } - - fd = get_unused_fd(); - if (fd < 0) { - error = fd; - goto out_dput; - } - - filp = dentry_open(dentry, mntget(parfilp->f_path.mnt), - hreq->oflags, cred); - if (IS_ERR(filp)) { - put_unused_fd(fd); - return PTR_ERR(filp); - } - - if (S_ISREG(inode->i_mode)) { - filp->f_flags |= O_NOATIME; - filp->f_mode |= FMODE_NOCMTIME; - } - - fd_install(fd, filp); - return fd; - - out_dput: - dput(dentry); - return error; -} - -/* - * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's - * unused first argument. - */ -STATIC int -do_readlink( - char __user *buffer, - int buflen, - const char *link) -{ - int len; - - len = PTR_ERR(link); - if (IS_ERR(link)) - goto out; - - len = strlen(link); - if (len > (unsigned) buflen) - len = buflen; - if (copy_to_user(buffer, link, len)) - len = -EFAULT; - out: - return len; -} - - -int -xfs_readlink_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq) -{ - struct dentry *dentry; - __u32 olen; - void *link; - int error; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - - dentry = xfs_handlereq_to_dentry(parfilp, hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - /* Restrict this handle operation to symlinks only. */ - if (!S_ISLNK(dentry->d_inode->i_mode)) { - error = -XFS_ERROR(EINVAL); - goto out_dput; - } - - if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { - error = -XFS_ERROR(EFAULT); - goto out_dput; - } - - link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); - if (!link) { - error = -XFS_ERROR(ENOMEM); - goto out_dput; - } - - error = -xfs_readlink(XFS_I(dentry->d_inode), link); - if (error) - goto out_kfree; - error = do_readlink(hreq->ohandle, olen, link); - if (error) - goto out_kfree; - - out_kfree: - kfree(link); - out_dput: - dput(dentry); - return error; -} - -STATIC int -xfs_fssetdm_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - struct fsdmidata fsd; - xfs_fsop_setdm_handlereq_t dmhreq; - struct dentry *dentry; - - if (!capable(CAP_MKNOD)) - return -XFS_ERROR(EPERM); - if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) - return -XFS_ERROR(EFAULT); - - dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { - error = -XFS_ERROR(EPERM); - goto out; - } - - if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { - error = -XFS_ERROR(EFAULT); - goto out; - } - - error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, - fsd.fsd_dmstate); - - out: - dput(dentry); - return error; -} - -STATIC int -xfs_attrlist_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error = -ENOMEM; - attrlist_cursor_kern_t *cursor; - xfs_fsop_attrlist_handlereq_t al_hreq; - struct dentry *dentry; - char *kbuf; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) - return -XFS_ERROR(EFAULT); - if (al_hreq.buflen > XATTR_LIST_MAX) - return -XFS_ERROR(EINVAL); - - /* - * Reject flags, only allow namespaces. - */ - if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) - return -XFS_ERROR(EINVAL); - - dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL); - if (!kbuf) - goto out_dput; - - cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, - al_hreq.flags, cursor); - if (error) - goto out_kfree; - - if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) - error = -EFAULT; - - out_kfree: - kfree(kbuf); - out_dput: - dput(dentry); - return error; -} - -int -xfs_attrmulti_attr_get( - struct inode *inode, - unsigned char *name, - unsigned char __user *ubuf, - __uint32_t *len, - __uint32_t flags) -{ - unsigned char *kbuf; - int error = EFAULT; - - if (*len > XATTR_SIZE_MAX) - return EINVAL; - kbuf = kmalloc(*len, GFP_KERNEL); - if (!kbuf) - return ENOMEM; - - error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); - if (error) - goto out_kfree; - - if (copy_to_user(ubuf, kbuf, *len)) - error = EFAULT; - - out_kfree: - kfree(kbuf); - return error; -} - -int -xfs_attrmulti_attr_set( - struct inode *inode, - unsigned char *name, - const unsigned char __user *ubuf, - __uint32_t len, - __uint32_t flags) -{ - unsigned char *kbuf; - int error = EFAULT; - - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return EPERM; - if (len > XATTR_SIZE_MAX) - return EINVAL; - - kbuf = memdup_user(ubuf, len); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); - - error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); - - return error; -} - -int -xfs_attrmulti_attr_remove( - struct inode *inode, - unsigned char *name, - __uint32_t flags) -{ - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return EPERM; - return xfs_attr_remove(XFS_I(inode), name, flags); -} - -STATIC int -xfs_attrmulti_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - xfs_attr_multiop_t *ops; - xfs_fsop_attrmulti_handlereq_t am_hreq; - struct dentry *dentry; - unsigned int i, size; - unsigned char *attr_name; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) - return -XFS_ERROR(EFAULT); - - /* overflow check */ - if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) - return -E2BIG; - - dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - error = E2BIG; - size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); - if (!size || size > 16 * PAGE_SIZE) - goto out_dput; - - ops = memdup_user(am_hreq.ops, size); - if (IS_ERR(ops)) { - error = PTR_ERR(ops); - goto out_dput; - } - - attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); - if (!attr_name) - goto out_kfree_ops; - - error = 0; - for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = strncpy_from_user((char *)attr_name, - ops[i].am_attrname, MAXNAMELEN); - if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) - error = -ERANGE; - if (ops[i].am_error < 0) - break; - - switch (ops[i].am_opcode) { - case ATTR_OP_GET: - ops[i].am_error = xfs_attrmulti_attr_get( - dentry->d_inode, attr_name, - ops[i].am_attrvalue, &ops[i].am_length, - ops[i].am_flags); - break; - case ATTR_OP_SET: - ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_set( - dentry->d_inode, attr_name, - ops[i].am_attrvalue, ops[i].am_length, - ops[i].am_flags); - mnt_drop_write(parfilp->f_path.mnt); - break; - case ATTR_OP_REMOVE: - ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_remove( - dentry->d_inode, attr_name, - ops[i].am_flags); - mnt_drop_write(parfilp->f_path.mnt); - break; - default: - ops[i].am_error = EINVAL; - } - } - - if (copy_to_user(am_hreq.ops, ops, size)) - error = XFS_ERROR(EFAULT); - - kfree(attr_name); - out_kfree_ops: - kfree(ops); - out_dput: - dput(dentry); - return -error; -} - -int -xfs_ioc_space( - struct xfs_inode *ip, - struct inode *inode, - struct file *filp, - int ioflags, - unsigned int cmd, - xfs_flock64_t *bf) -{ - int attr_flags = 0; - int error; - - /* - * Only allow the sys admin to reserve space unless - * unwritten extents are enabled. - */ - if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && - !capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - - if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) - return -XFS_ERROR(EPERM); - - if (!(filp->f_mode & FMODE_WRITE)) - return -XFS_ERROR(EBADF); - - if (!S_ISREG(inode->i_mode)) - return -XFS_ERROR(EINVAL); - - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - attr_flags |= XFS_ATTR_NONBLOCK; - - if (filp->f_flags & O_DSYNC) - attr_flags |= XFS_ATTR_SYNC; - - if (ioflags & IO_INVIS) - attr_flags |= XFS_ATTR_DMI; - - error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); - return -error; -} - -STATIC int -xfs_ioc_bulkstat( - xfs_mount_t *mp, - unsigned int cmd, - void __user *arg) -{ - xfs_fsop_bulkreq_t bulkreq; - int count; /* # of records returned */ - xfs_ino_t inlast; /* last inode number */ - int done; - int error; - - /* done = 1 if there are more stats to get and if bulkstat */ - /* should be called again (unused here, but used in dmapi) */ - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); - - if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) - return -XFS_ERROR(EFAULT); - - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) - return -XFS_ERROR(EFAULT); - - if ((count = bulkreq.icount) <= 0) - return -XFS_ERROR(EINVAL); - - if (bulkreq.ubuffer == NULL) - return -XFS_ERROR(EINVAL); - - if (cmd == XFS_IOC_FSINUMBERS) - error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer, xfs_inumbers_fmt); - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) - error = xfs_bulkstat_single(mp, &inlast, - bulkreq.ubuffer, &done); - else /* XFS_IOC_FSBULKSTAT */ - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, - sizeof(xfs_bstat_t), bulkreq.ubuffer, - &done); - - if (error) - return -error; - - if (bulkreq.ocount != NULL) { - if (copy_to_user(bulkreq.lastip, &inlast, - sizeof(xfs_ino_t))) - return -XFS_ERROR(EFAULT); - - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -XFS_ERROR(EFAULT); - } - - return 0; -} - -STATIC int -xfs_ioc_fsgeometry_v1( - xfs_mount_t *mp, - void __user *arg) -{ - xfs_fsop_geom_t fsgeo; - int error; - - error = xfs_fs_geometry(mp, &fsgeo, 3); - if (error) - return -error; - - /* - * Caller should have passed an argument of type - * xfs_fsop_geom_v1_t. This is a proper subset of the - * xfs_fsop_geom_t that xfs_fs_geometry() fills in. - */ - if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t))) - return -XFS_ERROR(EFAULT); - return 0; -} - -STATIC int -xfs_ioc_fsgeometry( - xfs_mount_t *mp, - void __user *arg) -{ - xfs_fsop_geom_t fsgeo; - int error; - - error = xfs_fs_geometry(mp, &fsgeo, 4); - if (error) - return -error; - - if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) - return -XFS_ERROR(EFAULT); - return 0; -} - -/* - * Linux extended inode flags interface. - */ - -STATIC unsigned int -xfs_merge_ioc_xflags( - unsigned int flags, - unsigned int start) -{ - unsigned int xflags = start; - - if (flags & FS_IMMUTABLE_FL) - xflags |= XFS_XFLAG_IMMUTABLE; - else - xflags &= ~XFS_XFLAG_IMMUTABLE; - if (flags & FS_APPEND_FL) - xflags |= XFS_XFLAG_APPEND; - else - xflags &= ~XFS_XFLAG_APPEND; - if (flags & FS_SYNC_FL) - xflags |= XFS_XFLAG_SYNC; - else - xflags &= ~XFS_XFLAG_SYNC; - if (flags & FS_NOATIME_FL) - xflags |= XFS_XFLAG_NOATIME; - else - xflags &= ~XFS_XFLAG_NOATIME; - if (flags & FS_NODUMP_FL) - xflags |= XFS_XFLAG_NODUMP; - else - xflags &= ~XFS_XFLAG_NODUMP; - - return xflags; -} - -STATIC unsigned int -xfs_di2lxflags( - __uint16_t di_flags) -{ - unsigned int flags = 0; - - if (di_flags & XFS_DIFLAG_IMMUTABLE) - flags |= FS_IMMUTABLE_FL; - if (di_flags & XFS_DIFLAG_APPEND) - flags |= FS_APPEND_FL; - if (di_flags & XFS_DIFLAG_SYNC) - flags |= FS_SYNC_FL; - if (di_flags & XFS_DIFLAG_NOATIME) - flags |= FS_NOATIME_FL; - if (di_flags & XFS_DIFLAG_NODUMP) - flags |= FS_NODUMP_FL; - return flags; -} - -STATIC int -xfs_ioc_fsgetxattr( - xfs_inode_t *ip, - int attr, - void __user *arg) -{ - struct fsxattr fa; - - memset(&fa, 0, sizeof(struct fsxattr)); - - xfs_ilock(ip, XFS_ILOCK_SHARED); - fa.fsx_xflags = xfs_ip2xflags(ip); - fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; - fa.fsx_projid = xfs_get_projid(ip); - - if (attr) { - if (ip->i_afp) { - if (ip->i_afp->if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = ip->i_afp->if_bytes / - sizeof(xfs_bmbt_rec_t); - else - fa.fsx_nextents = ip->i_d.di_anextents; - } else - fa.fsx_nextents = 0; - } else { - if (ip->i_df.if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = ip->i_df.if_bytes / - sizeof(xfs_bmbt_rec_t); - else - fa.fsx_nextents = ip->i_d.di_nextents; - } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (copy_to_user(arg, &fa, sizeof(fa))) - return -EFAULT; - return 0; -} - -STATIC void -xfs_set_diflags( - struct xfs_inode *ip, - unsigned int xflags) -{ - unsigned int di_flags; - - /* can't set PREALLOC this way, just preserve it */ - di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); - if (xflags & XFS_XFLAG_IMMUTABLE) - di_flags |= XFS_DIFLAG_IMMUTABLE; - if (xflags & XFS_XFLAG_APPEND) - di_flags |= XFS_DIFLAG_APPEND; - if (xflags & XFS_XFLAG_SYNC) - di_flags |= XFS_DIFLAG_SYNC; - if (xflags & XFS_XFLAG_NOATIME) - di_flags |= XFS_DIFLAG_NOATIME; - if (xflags & XFS_XFLAG_NODUMP) - di_flags |= XFS_DIFLAG_NODUMP; - if (xflags & XFS_XFLAG_PROJINHERIT) - di_flags |= XFS_DIFLAG_PROJINHERIT; - if (xflags & XFS_XFLAG_NODEFRAG) - di_flags |= XFS_DIFLAG_NODEFRAG; - if (xflags & XFS_XFLAG_FILESTREAM) - di_flags |= XFS_DIFLAG_FILESTREAM; - if (S_ISDIR(ip->i_d.di_mode)) { - if (xflags & XFS_XFLAG_RTINHERIT) - di_flags |= XFS_DIFLAG_RTINHERIT; - if (xflags & XFS_XFLAG_NOSYMLINKS) - di_flags |= XFS_DIFLAG_NOSYMLINKS; - if (xflags & XFS_XFLAG_EXTSZINHERIT) - di_flags |= XFS_DIFLAG_EXTSZINHERIT; - } else if (S_ISREG(ip->i_d.di_mode)) { - if (xflags & XFS_XFLAG_REALTIME) - di_flags |= XFS_DIFLAG_REALTIME; - if (xflags & XFS_XFLAG_EXTSIZE) - di_flags |= XFS_DIFLAG_EXTSIZE; - } - - ip->i_d.di_flags = di_flags; -} - -STATIC void -xfs_diflags_to_linux( - struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - unsigned int xflags = xfs_ip2xflags(ip); - - if (xflags & XFS_XFLAG_IMMUTABLE) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (xflags & XFS_XFLAG_APPEND) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (xflags & XFS_XFLAG_SYNC) - inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (xflags & XFS_XFLAG_NOATIME) - inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; -} - -#define FSX_PROJID 1 -#define FSX_EXTSIZE 2 -#define FSX_XFLAGS 4 -#define FSX_NONBLOCK 8 - -STATIC int -xfs_ioctl_setattr( - xfs_inode_t *ip, - struct fsxattr *fa, - int mask) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - unsigned int lock_flags = 0; - struct xfs_dquot *udqp = NULL; - struct xfs_dquot *gdqp = NULL; - struct xfs_dquot *olddquot = NULL; - int code; - - trace_xfs_ioctl_setattr(ip); - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - /* - * Disallow 32bit project ids when projid32bit feature is not enabled. - */ - if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && - !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) - return XFS_ERROR(EINVAL); - - /* - * If disk quotas is on, we make sure that the dquots do exist on disk, - * before we start any other transactions. Trying to do this later - * is messy. We don't care to take a readlock to look at the ids - * in inode here, because we can't hold it across the trans_reserve. - * If the IDs do change before we take the ilock, we're covered - * because the i_*dquot fields will get updated anyway. - */ - if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { - code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, - ip->i_d.di_gid, fa->fsx_projid, - XFS_QMOPT_PQUOTA, &udqp, &gdqp); - if (code) - return code; - } - - /* - * For the other attributes, we acquire the inode lock and - * first do an error checking pass. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); - code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); - if (code) - goto error_return; - - lock_flags = XFS_ILOCK_EXCL; - xfs_ilock(ip, lock_flags); - - /* - * CAP_FOWNER overrides the following restrictions: - * - * The user ID of the calling process must be equal - * to the file owner ID, except in cases where the - * CAP_FSETID capability is applicable. - */ - if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) { - code = XFS_ERROR(EPERM); - goto error_return; - } - - /* - * Do a quota reservation only if projid is actually going to change. - */ - if (mask & FSX_PROJID) { - if (XFS_IS_QUOTA_RUNNING(mp) && - XFS_IS_PQUOTA_ON(mp) && - xfs_get_projid(ip) != fa->fsx_projid) { - ASSERT(tp); - code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, - capable(CAP_FOWNER) ? - XFS_QMOPT_FORCE_RES : 0); - if (code) /* out of quota */ - goto error_return; - } - } - - if (mask & FSX_EXTSIZE) { - /* - * Can't change extent size if any extents are allocated. - */ - if (ip->i_d.di_nextents && - ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != - fa->fsx_extsize)) { - code = XFS_ERROR(EINVAL); /* EFBIG? */ - goto error_return; - } - - /* - * Extent size must be a multiple of the appropriate block - * size, if set at all. It must also be smaller than the - * maximum extent size supported by the filesystem. - * - * Also, for non-realtime files, limit the extent size hint to - * half the size of the AGs in the filesystem so alignment - * doesn't result in extents larger than an AG. - */ - if (fa->fsx_extsize != 0) { - xfs_extlen_t size; - xfs_fsblock_t extsize_fsb; - - extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); - if (extsize_fsb > MAXEXTLEN) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - - if (XFS_IS_REALTIME_INODE(ip) || - ((mask & FSX_XFLAGS) && - (fa->fsx_xflags & XFS_XFLAG_REALTIME))) { - size = mp->m_sb.sb_rextsize << - mp->m_sb.sb_blocklog; - } else { - size = mp->m_sb.sb_blocksize; - if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - } - - if (fa->fsx_extsize % size) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - } - } - - - if (mask & FSX_XFLAGS) { - /* - * Can't change realtime flag if any extents are allocated. - */ - if ((ip->i_d.di_nextents || ip->i_delayed_blks) && - (XFS_IS_REALTIME_INODE(ip)) != - (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { - code = XFS_ERROR(EINVAL); /* EFBIG? */ - goto error_return; - } - - /* - * If realtime flag is set then must have realtime data. - */ - if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) { - if ((mp->m_sb.sb_rblocks == 0) || - (mp->m_sb.sb_rextsize == 0) || - (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - } - - /* - * Can't modify an immutable/append-only file unless - * we have appropriate permission. - */ - if ((ip->i_d.di_flags & - (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || - (fa->fsx_xflags & - (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && - !capable(CAP_LINUX_IMMUTABLE)) { - code = XFS_ERROR(EPERM); - goto error_return; - } - } - - xfs_trans_ijoin(tp, ip); - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & FSX_PROJID) { - /* - * CAP_FSETID overrides the following restrictions: - * - * The set-user-ID and set-group-ID bits of a file will be - * cleared upon successful return from chown() - */ - if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && - !capable(CAP_FSETID)) - ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); - - /* - * Change the ownerships and register quota modifications - * in the transaction. - */ - if (xfs_get_projid(ip) != fa->fsx_projid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { - olddquot = xfs_qm_vop_chown(tp, ip, - &ip->i_gdquot, gdqp); - } - xfs_set_projid(ip, fa->fsx_projid); - - /* - * We may have to rev the inode as well as - * the superblock version number since projids didn't - * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. - */ - if (ip->i_d.di_version == 1) - xfs_bump_ino_vers2(tp, ip); - } - - } - - if (mask & FSX_EXTSIZE) - ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; - if (mask & FSX_XFLAGS) { - xfs_set_diflags(ip, fa->fsx_xflags); - xfs_diflags_to_linux(ip); - } - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); - - /* - * If this is a synchronous mount, make sure that the - * transaction goes to disk before returning to the user. - * This is slightly sub-optimal in that truncates require - * two sync transactions instead of one for wsync filesystems. - * One for the truncate and one for the timestamps since we - * don't want to change the timestamps unless we're sure the - * truncate worked. Truncates are less than 1% of the laddis - * mix so this probably isn't worth the trouble to optimize. - */ - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); - code = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, lock_flags); - - /* - * Release any dquot(s) the inode had kept before chown. - */ - xfs_qm_dqrele(olddquot); - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - - return code; - - error_return: - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - xfs_trans_cancel(tp, 0); - if (lock_flags) - xfs_iunlock(ip, lock_flags); - return code; -} - -STATIC int -xfs_ioc_fssetxattr( - xfs_inode_t *ip, - struct file *filp, - void __user *arg) -{ - struct fsxattr fa; - unsigned int mask; - - if (copy_from_user(&fa, arg, sizeof(fa))) - return -EFAULT; - - mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID; - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - mask |= FSX_NONBLOCK; - - return -xfs_ioctl_setattr(ip, &fa, mask); -} - -STATIC int -xfs_ioc_getxflags( - xfs_inode_t *ip, - void __user *arg) -{ - unsigned int flags; - - flags = xfs_di2lxflags(ip->i_d.di_flags); - if (copy_to_user(arg, &flags, sizeof(flags))) - return -EFAULT; - return 0; -} - -STATIC int -xfs_ioc_setxflags( - xfs_inode_t *ip, - struct file *filp, - void __user *arg) -{ - struct fsxattr fa; - unsigned int flags; - unsigned int mask; - - if (copy_from_user(&flags, arg, sizeof(flags))) - return -EFAULT; - - if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ - FS_NOATIME_FL | FS_NODUMP_FL | \ - FS_SYNC_FL)) - return -EOPNOTSUPP; - - mask = FSX_XFLAGS; - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - mask |= FSX_NONBLOCK; - fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); - - return -xfs_ioctl_setattr(ip, &fa, mask); -} - -STATIC int -xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) -{ - struct getbmap __user *base = *ap; - - /* copy only getbmap portion (not getbmapx) */ - if (copy_to_user(base, bmv, sizeof(struct getbmap))) - return XFS_ERROR(EFAULT); - - *ap += sizeof(struct getbmap); - return 0; -} - -STATIC int -xfs_ioc_getbmap( - struct xfs_inode *ip, - int ioflags, - unsigned int cmd, - void __user *arg) -{ - struct getbmapx bmx; - int error; - - if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) - return -XFS_ERROR(EFAULT); - - if (bmx.bmv_count < 2) - return -XFS_ERROR(EINVAL); - - bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); - if (ioflags & IO_INVIS) - bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; - - error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, - (struct getbmap *)arg+1); - if (error) - return -error; - - /* copy back header - only size of getbmap */ - if (copy_to_user(arg, &bmx, sizeof(struct getbmap))) - return -XFS_ERROR(EFAULT); - return 0; -} - -STATIC int -xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) -{ - struct getbmapx __user *base = *ap; - - if (copy_to_user(base, bmv, sizeof(struct getbmapx))) - return XFS_ERROR(EFAULT); - - *ap += sizeof(struct getbmapx); - return 0; -} - -STATIC int -xfs_ioc_getbmapx( - struct xfs_inode *ip, - void __user *arg) -{ - struct getbmapx bmx; - int error; - - if (copy_from_user(&bmx, arg, sizeof(bmx))) - return -XFS_ERROR(EFAULT); - - if (bmx.bmv_count < 2) - return -XFS_ERROR(EINVAL); - - if (bmx.bmv_iflags & (~BMV_IF_VALID)) - return -XFS_ERROR(EINVAL); - - error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, - (struct getbmapx *)arg+1); - if (error) - return -error; - - /* copy back header */ - if (copy_to_user(arg, &bmx, sizeof(struct getbmapx))) - return -XFS_ERROR(EFAULT); - - return 0; -} - -/* - * Note: some of the ioctl's return positive numbers as a - * byte count indicating success, such as readlink_by_handle. - * So we don't "sign flip" like most other routines. This means - * true errors need to be returned as a negative value. - */ -long -xfs_file_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - void __user *arg = (void __user *)p; - int ioflags = 0; - int error; - - if (filp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - trace_xfs_file_ioctl(ip); - - switch (cmd) { - case FITRIM: - return xfs_ioc_trim(mp, arg); - case XFS_IOC_ALLOCSP: - case XFS_IOC_FREESP: - case XFS_IOC_RESVSP: - case XFS_IOC_UNRESVSP: - case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP64: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - case XFS_IOC_ZERO_RANGE: { - xfs_flock64_t bf; - - if (copy_from_user(&bf, arg, sizeof(bf))) - return -XFS_ERROR(EFAULT); - return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); - } - case XFS_IOC_DIOINFO: { - struct dioattr da; - xfs_buftarg_t *target = - XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp; - - da.d_mem = da.d_miniosz = 1 << target->bt_sshift; - da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); - - if (copy_to_user(arg, &da, sizeof(da))) - return -XFS_ERROR(EFAULT); - return 0; - } - - case XFS_IOC_FSBULKSTAT_SINGLE: - case XFS_IOC_FSBULKSTAT: - case XFS_IOC_FSINUMBERS: - return xfs_ioc_bulkstat(mp, cmd, arg); - - case XFS_IOC_FSGEOMETRY_V1: - return xfs_ioc_fsgeometry_v1(mp, arg); - - case XFS_IOC_FSGEOMETRY: - return xfs_ioc_fsgeometry(mp, arg); - - case XFS_IOC_GETVERSION: - return put_user(inode->i_generation, (int __user *)arg); - - case XFS_IOC_FSGETXATTR: - return xfs_ioc_fsgetxattr(ip, 0, arg); - case XFS_IOC_FSGETXATTRA: - return xfs_ioc_fsgetxattr(ip, 1, arg); - case XFS_IOC_FSSETXATTR: - return xfs_ioc_fssetxattr(ip, filp, arg); - case XFS_IOC_GETXFLAGS: - return xfs_ioc_getxflags(ip, arg); - case XFS_IOC_SETXFLAGS: - return xfs_ioc_setxflags(ip, filp, arg); - - case XFS_IOC_FSSETDM: { - struct fsdmidata dmi; - - if (copy_from_user(&dmi, arg, sizeof(dmi))) - return -XFS_ERROR(EFAULT); - - error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, - dmi.fsd_dmstate); - return -error; - } - - case XFS_IOC_GETBMAP: - case XFS_IOC_GETBMAPA: - return xfs_ioc_getbmap(ip, ioflags, cmd, arg); - - case XFS_IOC_GETBMAPX: - return xfs_ioc_getbmapx(ip, arg); - - case XFS_IOC_FD_TO_HANDLE: - case XFS_IOC_PATH_TO_HANDLE: - case XFS_IOC_PATH_TO_FSHANDLE: { - xfs_fsop_handlereq_t hreq; - - if (copy_from_user(&hreq, arg, sizeof(hreq))) - return -XFS_ERROR(EFAULT); - return xfs_find_handle(cmd, &hreq); - } - case XFS_IOC_OPEN_BY_HANDLE: { - xfs_fsop_handlereq_t hreq; - - if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); - return xfs_open_by_handle(filp, &hreq); - } - case XFS_IOC_FSSETDM_BY_HANDLE: - return xfs_fssetdm_by_handle(filp, arg); - - case XFS_IOC_READLINK_BY_HANDLE: { - xfs_fsop_handlereq_t hreq; - - if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); - return xfs_readlink_by_handle(filp, &hreq); - } - case XFS_IOC_ATTRLIST_BY_HANDLE: - return xfs_attrlist_by_handle(filp, arg); - - case XFS_IOC_ATTRMULTI_BY_HANDLE: - return xfs_attrmulti_by_handle(filp, arg); - - case XFS_IOC_SWAPEXT: { - struct xfs_swapext sxp; - - if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) - return -XFS_ERROR(EFAULT); - error = xfs_swapext(&sxp); - return -error; - } - - case XFS_IOC_FSCOUNTS: { - xfs_fsop_counts_t out; - - error = xfs_fs_counts(mp, &out); - if (error) - return -error; - - if (copy_to_user(arg, &out, sizeof(out))) - return -XFS_ERROR(EFAULT); - return 0; - } - - case XFS_IOC_SET_RESBLKS: { - xfs_fsop_resblks_t inout; - __uint64_t in; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return -XFS_ERROR(EROFS); - - if (copy_from_user(&inout, arg, sizeof(inout))) - return -XFS_ERROR(EFAULT); - - /* input parameter is passed in resblks field of structure */ - in = inout.resblks; - error = xfs_reserve_blocks(mp, &in, &inout); - if (error) - return -error; - - if (copy_to_user(arg, &inout, sizeof(inout))) - return -XFS_ERROR(EFAULT); - return 0; - } - - case XFS_IOC_GET_RESBLKS: { - xfs_fsop_resblks_t out; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - error = xfs_reserve_blocks(mp, NULL, &out); - if (error) - return -error; - - if (copy_to_user(arg, &out, sizeof(out))) - return -XFS_ERROR(EFAULT); - - return 0; - } - - case XFS_IOC_FSGROWFSDATA: { - xfs_growfs_data_t in; - - if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); - - error = xfs_growfs_data(mp, &in); - return -error; - } - - case XFS_IOC_FSGROWFSLOG: { - xfs_growfs_log_t in; - - if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); - - error = xfs_growfs_log(mp, &in); - return -error; - } - - case XFS_IOC_FSGROWFSRT: { - xfs_growfs_rt_t in; - - if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); - - error = xfs_growfs_rt(mp, &in); - return -error; - } - - case XFS_IOC_GOINGDOWN: { - __uint32_t in; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (get_user(in, (__uint32_t __user *)arg)) - return -XFS_ERROR(EFAULT); - - error = xfs_fs_goingdown(mp, in); - return -error; - } - - case XFS_IOC_ERROR_INJECTION: { - xfs_error_injection_t in; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); - - error = xfs_errortag_add(in.errtag, mp); - return -error; - } - - case XFS_IOC_ERROR_CLEARALL: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - error = xfs_errortag_clearall(mp, 1); - return -error; - - default: - return -ENOTTY; - } -} diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h deleted file mode 100644 index d56173b..0000000 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2008 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_IOCTL_H__ -#define __XFS_IOCTL_H__ - -extern int -xfs_ioc_space( - struct xfs_inode *ip, - struct inode *inode, - struct file *filp, - int ioflags, - unsigned int cmd, - xfs_flock64_t *bf); - -extern int -xfs_find_handle( - unsigned int cmd, - xfs_fsop_handlereq_t *hreq); - -extern int -xfs_open_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq); - -extern int -xfs_readlink_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq); - -extern int -xfs_attrmulti_attr_get( - struct inode *inode, - unsigned char *name, - unsigned char __user *ubuf, - __uint32_t *len, - __uint32_t flags); - -extern int -xfs_attrmulti_attr_set( - struct inode *inode, - unsigned char *name, - const unsigned char __user *ubuf, - __uint32_t len, - __uint32_t flags); - -extern int -xfs_attrmulti_attr_remove( - struct inode *inode, - unsigned char *name, - __uint32_t flags); - -extern struct dentry * -xfs_handle_to_dentry( - struct file *parfilp, - void __user *uhandle, - u32 hlen); - -extern long -xfs_file_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p); - -extern long -xfs_file_compat_ioctl( - struct file *file, - unsigned int cmd, - unsigned long arg); - -#endif diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c deleted file mode 100644 index 54e623b..0000000 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ /dev/null @@ -1,672 +0,0 @@ -/* - * Copyright (c) 2004-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include -#include -#include -#include -#include -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_vnode.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_error.h" -#include "xfs_dfrag.h" -#include "xfs_vnodeops.h" -#include "xfs_fsops.h" -#include "xfs_alloc.h" -#include "xfs_rtalloc.h" -#include "xfs_attr.h" -#include "xfs_ioctl.h" -#include "xfs_ioctl32.h" -#include "xfs_trace.h" - -#define _NATIVE_IOC(cmd, type) \ - _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) - -#ifdef BROKEN_X86_ALIGNMENT -STATIC int -xfs_compat_flock64_copyin( - xfs_flock64_t *bf, - compat_xfs_flock64_t __user *arg32) -{ - if (get_user(bf->l_type, &arg32->l_type) || - get_user(bf->l_whence, &arg32->l_whence) || - get_user(bf->l_start, &arg32->l_start) || - get_user(bf->l_len, &arg32->l_len) || - get_user(bf->l_sysid, &arg32->l_sysid) || - get_user(bf->l_pid, &arg32->l_pid) || - copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32))) - return -XFS_ERROR(EFAULT); - return 0; -} - -STATIC int -xfs_compat_ioc_fsgeometry_v1( - struct xfs_mount *mp, - compat_xfs_fsop_geom_v1_t __user *arg32) -{ - xfs_fsop_geom_t fsgeo; - int error; - - error = xfs_fs_geometry(mp, &fsgeo, 3); - if (error) - return -error; - /* The 32-bit variant simply has some padding at the end */ - if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1))) - return -XFS_ERROR(EFAULT); - return 0; -} - -STATIC int -xfs_compat_growfs_data_copyin( - struct xfs_growfs_data *in, - compat_xfs_growfs_data_t __user *arg32) -{ - if (get_user(in->newblocks, &arg32->newblocks) || - get_user(in->imaxpct, &arg32->imaxpct)) - return -XFS_ERROR(EFAULT); - return 0; -} - -STATIC int -xfs_compat_growfs_rt_copyin( - struct xfs_growfs_rt *in, - compat_xfs_growfs_rt_t __user *arg32) -{ - if (get_user(in->newblocks, &arg32->newblocks) || - get_user(in->extsize, &arg32->extsize)) - return -XFS_ERROR(EFAULT); - return 0; -} - -STATIC int -xfs_inumbers_fmt_compat( - void __user *ubuffer, - const xfs_inogrp_t *buffer, - long count, - long *written) -{ - compat_xfs_inogrp_t __user *p32 = ubuffer; - long i; - - for (i = 0; i < count; i++) { - if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || - put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || - put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) - return -XFS_ERROR(EFAULT); - } - *written = count * sizeof(*p32); - return 0; -} - -#else -#define xfs_inumbers_fmt_compat xfs_inumbers_fmt -#endif /* BROKEN_X86_ALIGNMENT */ - -STATIC int -xfs_ioctl32_bstime_copyin( - xfs_bstime_t *bstime, - compat_xfs_bstime_t __user *bstime32) -{ - compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */ - - if (get_user(sec32, &bstime32->tv_sec) || - get_user(bstime->tv_nsec, &bstime32->tv_nsec)) - return -XFS_ERROR(EFAULT); - bstime->tv_sec = sec32; - return 0; -} - -/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */ -STATIC int -xfs_ioctl32_bstat_copyin( - xfs_bstat_t *bstat, - compat_xfs_bstat_t __user *bstat32) -{ - if (get_user(bstat->bs_ino, &bstat32->bs_ino) || - get_user(bstat->bs_mode, &bstat32->bs_mode) || - get_user(bstat->bs_nlink, &bstat32->bs_nlink) || - get_user(bstat->bs_uid, &bstat32->bs_uid) || - get_user(bstat->bs_gid, &bstat32->bs_gid) || - get_user(bstat->bs_rdev, &bstat32->bs_rdev) || - get_user(bstat->bs_blksize, &bstat32->bs_blksize) || - get_user(bstat->bs_size, &bstat32->bs_size) || - xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) || - xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) || - xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) || - get_user(bstat->bs_blocks, &bstat32->bs_size) || - get_user(bstat->bs_xflags, &bstat32->bs_size) || - get_user(bstat->bs_extsize, &bstat32->bs_extsize) || - get_user(bstat->bs_extents, &bstat32->bs_extents) || - get_user(bstat->bs_gen, &bstat32->bs_gen) || - get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || - get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || - get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || - get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || - get_user(bstat->bs_aextents, &bstat32->bs_aextents)) - return -XFS_ERROR(EFAULT); - return 0; -} - -/* XFS_IOC_FSBULKSTAT and friends */ - -STATIC int -xfs_bstime_store_compat( - compat_xfs_bstime_t __user *p32, - const xfs_bstime_t *p) -{ - __s32 sec32; - - sec32 = p->tv_sec; - if (put_user(sec32, &p32->tv_sec) || - put_user(p->tv_nsec, &p32->tv_nsec)) - return -XFS_ERROR(EFAULT); - return 0; -} - -/* Return 0 on success or positive error (to xfs_bulkstat()) */ -STATIC int -xfs_bulkstat_one_fmt_compat( - void __user *ubuffer, - int ubsize, - int *ubused, - const xfs_bstat_t *buffer) -{ - compat_xfs_bstat_t __user *p32 = ubuffer; - - if (ubsize < sizeof(*p32)) - return XFS_ERROR(ENOMEM); - - if (put_user(buffer->bs_ino, &p32->bs_ino) || - put_user(buffer->bs_mode, &p32->bs_mode) || - put_user(buffer->bs_nlink, &p32->bs_nlink) || - put_user(buffer->bs_uid, &p32->bs_uid) || - put_user(buffer->bs_gid, &p32->bs_gid) || - put_user(buffer->bs_rdev, &p32->bs_rdev) || - put_user(buffer->bs_blksize, &p32->bs_blksize) || - put_user(buffer->bs_size, &p32->bs_size) || - xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) || - xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) || - xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) || - put_user(buffer->bs_blocks, &p32->bs_blocks) || - put_user(buffer->bs_xflags, &p32->bs_xflags) || - put_user(buffer->bs_extsize, &p32->bs_extsize) || - put_user(buffer->bs_extents, &p32->bs_extents) || - put_user(buffer->bs_gen, &p32->bs_gen) || - put_user(buffer->bs_projid, &p32->bs_projid) || - put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || - put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || - put_user(buffer->bs_dmstate, &p32->bs_dmstate) || - put_user(buffer->bs_aextents, &p32->bs_aextents)) - return XFS_ERROR(EFAULT); - if (ubused) - *ubused = sizeof(*p32); - return 0; -} - -STATIC int -xfs_bulkstat_one_compat( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - int *ubused, /* bytes used by me */ - int *stat) /* BULKSTAT_RV_... */ -{ - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt_compat, - ubused, stat); -} - -/* copied from xfs_ioctl.c */ -STATIC int -xfs_compat_ioc_bulkstat( - xfs_mount_t *mp, - unsigned int cmd, - compat_xfs_fsop_bulkreq_t __user *p32) -{ - u32 addr; - xfs_fsop_bulkreq_t bulkreq; - int count; /* # of records returned */ - xfs_ino_t inlast; /* last inode number */ - int done; - int error; - - /* done = 1 if there are more stats to get and if bulkstat */ - /* should be called again (unused here, but used in dmapi) */ - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); - - if (get_user(addr, &p32->lastip)) - return -XFS_ERROR(EFAULT); - bulkreq.lastip = compat_ptr(addr); - if (get_user(bulkreq.icount, &p32->icount) || - get_user(addr, &p32->ubuffer)) - return -XFS_ERROR(EFAULT); - bulkreq.ubuffer = compat_ptr(addr); - if (get_user(addr, &p32->ocount)) - return -XFS_ERROR(EFAULT); - bulkreq.ocount = compat_ptr(addr); - - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) - return -XFS_ERROR(EFAULT); - - if ((count = bulkreq.icount) <= 0) - return -XFS_ERROR(EINVAL); - - if (bulkreq.ubuffer == NULL) - return -XFS_ERROR(EINVAL); - - if (cmd == XFS_IOC_FSINUMBERS_32) { - error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer, xfs_inumbers_fmt_compat); - } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { - int res; - - error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, - sizeof(compat_xfs_bstat_t), 0, &res); - } else if (cmd == XFS_IOC_FSBULKSTAT_32) { - error = xfs_bulkstat(mp, &inlast, &count, - xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), - bulkreq.ubuffer, &done); - } else - error = XFS_ERROR(EINVAL); - if (error) - return -error; - - if (bulkreq.ocount != NULL) { - if (copy_to_user(bulkreq.lastip, &inlast, - sizeof(xfs_ino_t))) - return -XFS_ERROR(EFAULT); - - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -XFS_ERROR(EFAULT); - } - - return 0; -} - -STATIC int -xfs_compat_handlereq_copyin( - xfs_fsop_handlereq_t *hreq, - compat_xfs_fsop_handlereq_t __user *arg32) -{ - compat_xfs_fsop_handlereq_t hreq32; - - if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); - - hreq->fd = hreq32.fd; - hreq->path = compat_ptr(hreq32.path); - hreq->oflags = hreq32.oflags; - hreq->ihandle = compat_ptr(hreq32.ihandle); - hreq->ihandlen = hreq32.ihandlen; - hreq->ohandle = compat_ptr(hreq32.ohandle); - hreq->ohandlen = compat_ptr(hreq32.ohandlen); - - return 0; -} - -STATIC struct dentry * -xfs_compat_handlereq_to_dentry( - struct file *parfilp, - compat_xfs_fsop_handlereq_t *hreq) -{ - return xfs_handle_to_dentry(parfilp, - compat_ptr(hreq->ihandle), hreq->ihandlen); -} - -STATIC int -xfs_compat_attrlist_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - attrlist_cursor_kern_t *cursor; - compat_xfs_fsop_attrlist_handlereq_t al_hreq; - struct dentry *dentry; - char *kbuf; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - if (copy_from_user(&al_hreq, arg, - sizeof(compat_xfs_fsop_attrlist_handlereq_t))) - return -XFS_ERROR(EFAULT); - if (al_hreq.buflen > XATTR_LIST_MAX) - return -XFS_ERROR(EINVAL); - - /* - * Reject flags, only allow namespaces. - */ - if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) - return -XFS_ERROR(EINVAL); - - dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - error = -ENOMEM; - kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); - if (!kbuf) - goto out_dput; - - cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, - al_hreq.flags, cursor); - if (error) - goto out_kfree; - - if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen)) - error = -EFAULT; - - out_kfree: - kfree(kbuf); - out_dput: - dput(dentry); - return error; -} - -STATIC int -xfs_compat_attrmulti_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - compat_xfs_attr_multiop_t *ops; - compat_xfs_fsop_attrmulti_handlereq_t am_hreq; - struct dentry *dentry; - unsigned int i, size; - unsigned char *attr_name; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - if (copy_from_user(&am_hreq, arg, - sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) - return -XFS_ERROR(EFAULT); - - /* overflow check */ - if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t)) - return -E2BIG; - - dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - error = E2BIG; - size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t); - if (!size || size > 16 * PAGE_SIZE) - goto out_dput; - - ops = memdup_user(compat_ptr(am_hreq.ops), size); - if (IS_ERR(ops)) { - error = PTR_ERR(ops); - goto out_dput; - } - - attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); - if (!attr_name) - goto out_kfree_ops; - - error = 0; - for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = strncpy_from_user((char *)attr_name, - compat_ptr(ops[i].am_attrname), - MAXNAMELEN); - if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) - error = -ERANGE; - if (ops[i].am_error < 0) - break; - - switch (ops[i].am_opcode) { - case ATTR_OP_GET: - ops[i].am_error = xfs_attrmulti_attr_get( - dentry->d_inode, attr_name, - compat_ptr(ops[i].am_attrvalue), - &ops[i].am_length, ops[i].am_flags); - break; - case ATTR_OP_SET: - ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_set( - dentry->d_inode, attr_name, - compat_ptr(ops[i].am_attrvalue), - ops[i].am_length, ops[i].am_flags); - mnt_drop_write(parfilp->f_path.mnt); - break; - case ATTR_OP_REMOVE: - ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_remove( - dentry->d_inode, attr_name, - ops[i].am_flags); - mnt_drop_write(parfilp->f_path.mnt); - break; - default: - ops[i].am_error = EINVAL; - } - } - - if (copy_to_user(compat_ptr(am_hreq.ops), ops, size)) - error = XFS_ERROR(EFAULT); - - kfree(attr_name); - out_kfree_ops: - kfree(ops); - out_dput: - dput(dentry); - return -error; -} - -STATIC int -xfs_compat_fssetdm_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - struct fsdmidata fsd; - compat_xfs_fsop_setdm_handlereq_t dmhreq; - struct dentry *dentry; - - if (!capable(CAP_MKNOD)) - return -XFS_ERROR(EPERM); - if (copy_from_user(&dmhreq, arg, - sizeof(compat_xfs_fsop_setdm_handlereq_t))) - return -XFS_ERROR(EFAULT); - - dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { - error = -XFS_ERROR(EPERM); - goto out; - } - - if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) { - error = -XFS_ERROR(EFAULT); - goto out; - } - - error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, - fsd.fsd_dmstate); - -out: - dput(dentry); - return error; -} - -long -xfs_file_compat_ioctl( - struct file *filp, - unsigned cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - void __user *arg = (void __user *)p; - int ioflags = 0; - int error; - - if (filp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - trace_xfs_file_compat_ioctl(ip); - - switch (cmd) { - /* No size or alignment issues on any arch */ - case XFS_IOC_DIOINFO: - case XFS_IOC_FSGEOMETRY: - case XFS_IOC_FSGETXATTR: - case XFS_IOC_FSSETXATTR: - case XFS_IOC_FSGETXATTRA: - case XFS_IOC_FSSETDM: - case XFS_IOC_GETBMAP: - case XFS_IOC_GETBMAPA: - case XFS_IOC_GETBMAPX: - case XFS_IOC_FSCOUNTS: - case XFS_IOC_SET_RESBLKS: - case XFS_IOC_GET_RESBLKS: - case XFS_IOC_FSGROWFSLOG: - case XFS_IOC_GOINGDOWN: - case XFS_IOC_ERROR_INJECTION: - case XFS_IOC_ERROR_CLEARALL: - return xfs_file_ioctl(filp, cmd, p); -#ifndef BROKEN_X86_ALIGNMENT - /* These are handled fine if no alignment issues */ - case XFS_IOC_ALLOCSP: - case XFS_IOC_FREESP: - case XFS_IOC_RESVSP: - case XFS_IOC_UNRESVSP: - case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP64: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - case XFS_IOC_FSGEOMETRY_V1: - case XFS_IOC_FSGROWFSDATA: - case XFS_IOC_FSGROWFSRT: - case XFS_IOC_ZERO_RANGE: - return xfs_file_ioctl(filp, cmd, p); -#else - case XFS_IOC_ALLOCSP_32: - case XFS_IOC_FREESP_32: - case XFS_IOC_ALLOCSP64_32: - case XFS_IOC_FREESP64_32: - case XFS_IOC_RESVSP_32: - case XFS_IOC_UNRESVSP_32: - case XFS_IOC_RESVSP64_32: - case XFS_IOC_UNRESVSP64_32: - case XFS_IOC_ZERO_RANGE_32: { - struct xfs_flock64 bf; - - if (xfs_compat_flock64_copyin(&bf, arg)) - return -XFS_ERROR(EFAULT); - cmd = _NATIVE_IOC(cmd, struct xfs_flock64); - return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); - } - case XFS_IOC_FSGEOMETRY_V1_32: - return xfs_compat_ioc_fsgeometry_v1(mp, arg); - case XFS_IOC_FSGROWFSDATA_32: { - struct xfs_growfs_data in; - - if (xfs_compat_growfs_data_copyin(&in, arg)) - return -XFS_ERROR(EFAULT); - error = xfs_growfs_data(mp, &in); - return -error; - } - case XFS_IOC_FSGROWFSRT_32: { - struct xfs_growfs_rt in; - - if (xfs_compat_growfs_rt_copyin(&in, arg)) - return -XFS_ERROR(EFAULT); - error = xfs_growfs_rt(mp, &in); - return -error; - } -#endif - /* long changes size, but xfs only copiese out 32 bits */ - case XFS_IOC_GETXFLAGS_32: - case XFS_IOC_SETXFLAGS_32: - case XFS_IOC_GETVERSION_32: - cmd = _NATIVE_IOC(cmd, long); - return xfs_file_ioctl(filp, cmd, p); - case XFS_IOC_SWAPEXT_32: { - struct xfs_swapext sxp; - struct compat_xfs_swapext __user *sxu = arg; - - /* Bulk copy in up to the sx_stat field, then copy bstat */ - if (copy_from_user(&sxp, sxu, - offsetof(struct xfs_swapext, sx_stat)) || - xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) - return -XFS_ERROR(EFAULT); - error = xfs_swapext(&sxp); - return -error; - } - case XFS_IOC_FSBULKSTAT_32: - case XFS_IOC_FSBULKSTAT_SINGLE_32: - case XFS_IOC_FSINUMBERS_32: - return xfs_compat_ioc_bulkstat(mp, cmd, arg); - case XFS_IOC_FD_TO_HANDLE_32: - case XFS_IOC_PATH_TO_HANDLE_32: - case XFS_IOC_PATH_TO_FSHANDLE_32: { - struct xfs_fsop_handlereq hreq; - - if (xfs_compat_handlereq_copyin(&hreq, arg)) - return -XFS_ERROR(EFAULT); - cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); - return xfs_find_handle(cmd, &hreq); - } - case XFS_IOC_OPEN_BY_HANDLE_32: { - struct xfs_fsop_handlereq hreq; - - if (xfs_compat_handlereq_copyin(&hreq, arg)) - return -XFS_ERROR(EFAULT); - return xfs_open_by_handle(filp, &hreq); - } - case XFS_IOC_READLINK_BY_HANDLE_32: { - struct xfs_fsop_handlereq hreq; - - if (xfs_compat_handlereq_copyin(&hreq, arg)) - return -XFS_ERROR(EFAULT); - return xfs_readlink_by_handle(filp, &hreq); - } - case XFS_IOC_ATTRLIST_BY_HANDLE_32: - return xfs_compat_attrlist_by_handle(filp, arg); - case XFS_IOC_ATTRMULTI_BY_HANDLE_32: - return xfs_compat_attrmulti_by_handle(filp, arg); - case XFS_IOC_FSSETDM_BY_HANDLE_32: - return xfs_compat_fssetdm_by_handle(filp, arg); - default: - return -XFS_ERROR(ENOIOCTLCMD); - } -} diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h deleted file mode 100644 index 80f4060..0000000 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2004-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_IOCTL32_H__ -#define __XFS_IOCTL32_H__ - -#include - -/* - * on 32-bit arches, ioctl argument structures may have different sizes - * and/or alignment. We define compat structures which match the - * 32-bit sizes/alignments here, and their associated ioctl numbers. - * - * xfs_ioctl32.c contains routines to copy these structures in and out. - */ - -/* stock kernel-level ioctls we support */ -#define XFS_IOC_GETXFLAGS_32 FS_IOC32_GETFLAGS -#define XFS_IOC_SETXFLAGS_32 FS_IOC32_SETFLAGS -#define XFS_IOC_GETVERSION_32 FS_IOC32_GETVERSION - -/* - * On intel, even if sizes match, alignment and/or padding may differ. - */ -#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) -#define BROKEN_X86_ALIGNMENT -#define __compat_packed __attribute__((packed)) -#else -#define __compat_packed -#endif - -typedef struct compat_xfs_bstime { - compat_time_t tv_sec; /* seconds */ - __s32 tv_nsec; /* and nanoseconds */ -} compat_xfs_bstime_t; - -typedef struct compat_xfs_bstat { - __u64 bs_ino; /* inode number */ - __u16 bs_mode; /* type and mode */ - __u16 bs_nlink; /* number of links */ - __u32 bs_uid; /* user id */ - __u32 bs_gid; /* group id */ - __u32 bs_rdev; /* device value */ - __s32 bs_blksize; /* block size */ - __s64 bs_size; /* file size */ - compat_xfs_bstime_t bs_atime; /* access time */ - compat_xfs_bstime_t bs_mtime; /* modify time */ - compat_xfs_bstime_t bs_ctime; /* inode change time */ - int64_t bs_blocks; /* number of blocks */ - __u32 bs_xflags; /* extended flags */ - __s32 bs_extsize; /* extent size */ - __s32 bs_extents; /* number of extents */ - __u32 bs_gen; /* generation count */ - __u16 bs_projid_lo; /* lower part of project id */ -#define bs_projid bs_projid_lo /* (previously just bs_projid) */ - __u16 bs_projid_hi; /* high part of project id */ - unsigned char bs_pad[12]; /* pad space, unused */ - __u32 bs_dmevmask; /* DMIG event mask */ - __u16 bs_dmstate; /* DMIG state info */ - __u16 bs_aextents; /* attribute number of extents */ -} __compat_packed compat_xfs_bstat_t; - -typedef struct compat_xfs_fsop_bulkreq { - compat_uptr_t lastip; /* last inode # pointer */ - __s32 icount; /* count of entries in buffer */ - compat_uptr_t ubuffer; /* user buffer for inode desc. */ - compat_uptr_t ocount; /* output count pointer */ -} compat_xfs_fsop_bulkreq_t; - -#define XFS_IOC_FSBULKSTAT_32 \ - _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) -#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ - _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) -#define XFS_IOC_FSINUMBERS_32 \ - _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) - -typedef struct compat_xfs_fsop_handlereq { - __u32 fd; /* fd for FD_TO_HANDLE */ - compat_uptr_t path; /* user pathname */ - __u32 oflags; /* open flags */ - compat_uptr_t ihandle; /* user supplied handle */ - __u32 ihandlen; /* user supplied length */ - compat_uptr_t ohandle; /* user buffer for handle */ - compat_uptr_t ohandlen; /* user buffer length */ -} compat_xfs_fsop_handlereq_t; - -#define XFS_IOC_PATH_TO_FSHANDLE_32 \ - _IOWR('X', 104, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_PATH_TO_HANDLE_32 \ - _IOWR('X', 105, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_FD_TO_HANDLE_32 \ - _IOWR('X', 106, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_OPEN_BY_HANDLE_32 \ - _IOWR('X', 107, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_READLINK_BY_HANDLE_32 \ - _IOWR('X', 108, struct compat_xfs_fsop_handlereq) - -/* The bstat field in the swapext struct needs translation */ -typedef struct compat_xfs_swapext { - __int64_t sx_version; /* version */ - __int64_t sx_fdtarget; /* fd of target file */ - __int64_t sx_fdtmp; /* fd of tmp file */ - xfs_off_t sx_offset; /* offset into file */ - xfs_off_t sx_length; /* leng from offset */ - char sx_pad[16]; /* pad space, unused */ - compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */ -} __compat_packed compat_xfs_swapext_t; - -#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) - -typedef struct compat_xfs_fsop_attrlist_handlereq { - struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ - struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */ - __u32 flags; /* which namespace to use */ - __u32 buflen; /* length of buffer supplied */ - compat_uptr_t buffer; /* returned names */ -} __compat_packed compat_xfs_fsop_attrlist_handlereq_t; - -/* Note: actually this is read/write */ -#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \ - _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq) - -/* am_opcodes defined in xfs_fs.h */ -typedef struct compat_xfs_attr_multiop { - __u32 am_opcode; - __s32 am_error; - compat_uptr_t am_attrname; - compat_uptr_t am_attrvalue; - __u32 am_length; - __u32 am_flags; -} compat_xfs_attr_multiop_t; - -typedef struct compat_xfs_fsop_attrmulti_handlereq { - struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ - __u32 opcount;/* count of following multiop */ - /* ptr to compat_xfs_attr_multiop */ - compat_uptr_t ops; /* attr_multi data */ -} compat_xfs_fsop_attrmulti_handlereq_t; - -#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \ - _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq) - -typedef struct compat_xfs_fsop_setdm_handlereq { - struct compat_xfs_fsop_handlereq hreq; /* handle information */ - /* ptr to struct fsdmidata */ - compat_uptr_t data; /* DMAPI data */ -} compat_xfs_fsop_setdm_handlereq_t; - -#define XFS_IOC_FSSETDM_BY_HANDLE_32 \ - _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq) - -#ifdef BROKEN_X86_ALIGNMENT -/* on ia32 l_start is on a 32-bit boundary */ -typedef struct compat_xfs_flock64 { - __s16 l_type; - __s16 l_whence; - __s64 l_start __attribute__((packed)); - /* len == 0 means until end of file */ - __s64 l_len __attribute__((packed)); - __s32 l_sysid; - __u32 l_pid; - __s32 l_pad[4]; /* reserve area */ -} compat_xfs_flock64_t; - -#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64) -#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64) -#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64) -#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64) -#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64) -#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) -#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) -#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) -#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64) - -typedef struct compat_xfs_fsop_geom_v1 { - __u32 blocksize; /* filesystem (data) block size */ - __u32 rtextsize; /* realtime extent size */ - __u32 agblocks; /* fsblocks in an AG */ - __u32 agcount; /* number of allocation groups */ - __u32 logblocks; /* fsblocks in the log */ - __u32 sectsize; /* (data) sector size, bytes */ - __u32 inodesize; /* inode size in bytes */ - __u32 imaxpct; /* max allowed inode space(%) */ - __u64 datablocks; /* fsblocks in data subvolume */ - __u64 rtblocks; /* fsblocks in realtime subvol */ - __u64 rtextents; /* rt extents in realtime subvol*/ - __u64 logstart; /* starting fsblock of the log */ - unsigned char uuid[16]; /* unique id of the filesystem */ - __u32 sunit; /* stripe unit, fsblocks */ - __u32 swidth; /* stripe width, fsblocks */ - __s32 version; /* structure version */ - __u32 flags; /* superblock version flags */ - __u32 logsectsize; /* log sector size, bytes */ - __u32 rtsectsize; /* realtime sector size, bytes */ - __u32 dirblocksize; /* directory block size, bytes */ -} __attribute__((packed)) compat_xfs_fsop_geom_v1_t; - -#define XFS_IOC_FSGEOMETRY_V1_32 \ - _IOR('X', 100, struct compat_xfs_fsop_geom_v1) - -typedef struct compat_xfs_inogrp { - __u64 xi_startino; /* starting inode number */ - __s32 xi_alloccount; /* # bits set in allocmask */ - __u64 xi_allocmask; /* mask of allocated inodes */ -} __attribute__((packed)) compat_xfs_inogrp_t; - -/* These growfs input structures have padding on the end, so must translate */ -typedef struct compat_xfs_growfs_data { - __u64 newblocks; /* new data subvol size, fsblocks */ - __u32 imaxpct; /* new inode space percentage limit */ -} __attribute__((packed)) compat_xfs_growfs_data_t; - -typedef struct compat_xfs_growfs_rt { - __u64 newblocks; /* new realtime size, fsblocks */ - __u32 extsize; /* new realtime extent size, fsblocks */ -} __attribute__((packed)) compat_xfs_growfs_rt_t; - -#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data) -#define XFS_IOC_FSGROWFSRT_32 _IOW('X', 112, struct compat_xfs_growfs_rt) - -#endif /* BROKEN_X86_ALIGNMENT */ - -#endif /* __XFS_IOCTL32_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c deleted file mode 100644 index b9c172b..0000000 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ /dev/null @@ -1,1210 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_acl.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_rw.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_utils.h" -#include "xfs_vnodeops.h" -#include "xfs_inode_item.h" -#include "xfs_trace.h" - -#include -#include -#include -#include -#include -#include -#include - -/* - * Bring the timestamps in the XFS inode uptodate. - * - * Used before writing the inode to disk. - */ -void -xfs_synchronize_times( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; - ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; - ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; - ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; -} - -/* - * If the linux inode is valid, mark it dirty. - * Used when committing a dirty inode into a transaction so that - * the inode will get written back by the linux code - */ -void -xfs_mark_inode_dirty_sync( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) - mark_inode_dirty_sync(inode); -} - -void -xfs_mark_inode_dirty( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) - mark_inode_dirty(inode); -} - -/* - * Hook in SELinux. This is not quite correct yet, what we really need - * here (as we do for default ACLs) is a mechanism by which creation of - * these attrs can be journalled at inode creation time (along with the - * inode, of course, such that log replay can't cause these to be lost). - */ -STATIC int -xfs_init_security( - struct inode *inode, - struct inode *dir, - const struct qstr *qstr) -{ - struct xfs_inode *ip = XFS_I(inode); - size_t length; - void *value; - unsigned char *name; - int error; - - error = security_inode_init_security(inode, dir, qstr, (char **)&name, - &value, &length); - if (error) { - if (error == -EOPNOTSUPP) - return 0; - return -error; - } - - error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); - - kfree(name); - kfree(value); - return error; -} - -static void -xfs_dentry_to_name( - struct xfs_name *namep, - struct dentry *dentry) -{ - namep->name = dentry->d_name.name; - namep->len = dentry->d_name.len; -} - -STATIC void -xfs_cleanup_inode( - struct inode *dir, - struct inode *inode, - struct dentry *dentry) -{ - struct xfs_name teardown; - - /* Oh, the horror. - * If we can't add the ACL or we fail in - * xfs_init_security we must back out. - * ENOSPC can hit here, among other things. - */ - xfs_dentry_to_name(&teardown, dentry); - - xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); - iput(inode); -} - -STATIC int -xfs_vn_mknod( - struct inode *dir, - struct dentry *dentry, - int mode, - dev_t rdev) -{ - struct inode *inode; - struct xfs_inode *ip = NULL; - struct posix_acl *default_acl = NULL; - struct xfs_name name; - int error; - - /* - * Irix uses Missed'em'V split, but doesn't want to see - * the upper 5 bits of (14bit) major. - */ - if (S_ISCHR(mode) || S_ISBLK(mode)) { - if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) - return -EINVAL; - rdev = sysv_encode_dev(rdev); - } else { - rdev = 0; - } - - if (IS_POSIXACL(dir)) { - default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(default_acl)) - return PTR_ERR(default_acl); - - if (!default_acl) - mode &= ~current_umask(); - } - - xfs_dentry_to_name(&name, dentry); - error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); - if (unlikely(error)) - goto out_free_acl; - - inode = VFS_I(ip); - - error = xfs_init_security(inode, dir, &dentry->d_name); - if (unlikely(error)) - goto out_cleanup_inode; - - if (default_acl) { - error = -xfs_inherit_acl(inode, default_acl); - default_acl = NULL; - if (unlikely(error)) - goto out_cleanup_inode; - } - - - d_instantiate(dentry, inode); - return -error; - - out_cleanup_inode: - xfs_cleanup_inode(dir, inode, dentry); - out_free_acl: - posix_acl_release(default_acl); - return -error; -} - -STATIC int -xfs_vn_create( - struct inode *dir, - struct dentry *dentry, - int mode, - struct nameidata *nd) -{ - return xfs_vn_mknod(dir, dentry, mode, 0); -} - -STATIC int -xfs_vn_mkdir( - struct inode *dir, - struct dentry *dentry, - int mode) -{ - return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0); -} - -STATIC struct dentry * -xfs_vn_lookup( - struct inode *dir, - struct dentry *dentry, - struct nameidata *nd) -{ - struct xfs_inode *cip; - struct xfs_name name; - int error; - - if (dentry->d_name.len >= MAXNAMELEN) - return ERR_PTR(-ENAMETOOLONG); - - xfs_dentry_to_name(&name, dentry); - error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); - if (unlikely(error)) { - if (unlikely(error != ENOENT)) - return ERR_PTR(-error); - d_add(dentry, NULL); - return NULL; - } - - return d_splice_alias(VFS_I(cip), dentry); -} - -STATIC struct dentry * -xfs_vn_ci_lookup( - struct inode *dir, - struct dentry *dentry, - struct nameidata *nd) -{ - struct xfs_inode *ip; - struct xfs_name xname; - struct xfs_name ci_name; - struct qstr dname; - int error; - - if (dentry->d_name.len >= MAXNAMELEN) - return ERR_PTR(-ENAMETOOLONG); - - xfs_dentry_to_name(&xname, dentry); - error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); - if (unlikely(error)) { - if (unlikely(error != ENOENT)) - return ERR_PTR(-error); - /* - * call d_add(dentry, NULL) here when d_drop_negative_children - * is called in xfs_vn_mknod (ie. allow negative dentries - * with CI filesystems). - */ - return NULL; - } - - /* if exact match, just splice and exit */ - if (!ci_name.name) - return d_splice_alias(VFS_I(ip), dentry); - - /* else case-insensitive match... */ - dname.name = ci_name.name; - dname.len = ci_name.len; - dentry = d_add_ci(dentry, VFS_I(ip), &dname); - kmem_free(ci_name.name); - return dentry; -} - -STATIC int -xfs_vn_link( - struct dentry *old_dentry, - struct inode *dir, - struct dentry *dentry) -{ - struct inode *inode = old_dentry->d_inode; - struct xfs_name name; - int error; - - xfs_dentry_to_name(&name, dentry); - - error = xfs_link(XFS_I(dir), XFS_I(inode), &name); - if (unlikely(error)) - return -error; - - ihold(inode); - d_instantiate(dentry, inode); - return 0; -} - -STATIC int -xfs_vn_unlink( - struct inode *dir, - struct dentry *dentry) -{ - struct xfs_name name; - int error; - - xfs_dentry_to_name(&name, dentry); - - error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); - if (error) - return error; - - /* - * With unlink, the VFS makes the dentry "negative": no inode, - * but still hashed. This is incompatible with case-insensitive - * mode, so invalidate (unhash) the dentry in CI-mode. - */ - if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb)) - d_invalidate(dentry); - return 0; -} - -STATIC int -xfs_vn_symlink( - struct inode *dir, - struct dentry *dentry, - const char *symname) -{ - struct inode *inode; - struct xfs_inode *cip = NULL; - struct xfs_name name; - int error; - mode_t mode; - - mode = S_IFLNK | - (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); - xfs_dentry_to_name(&name, dentry); - - error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); - if (unlikely(error)) - goto out; - - inode = VFS_I(cip); - - error = xfs_init_security(inode, dir, &dentry->d_name); - if (unlikely(error)) - goto out_cleanup_inode; - - d_instantiate(dentry, inode); - return 0; - - out_cleanup_inode: - xfs_cleanup_inode(dir, inode, dentry); - out: - return -error; -} - -STATIC int -xfs_vn_rename( - struct inode *odir, - struct dentry *odentry, - struct inode *ndir, - struct dentry *ndentry) -{ - struct inode *new_inode = ndentry->d_inode; - struct xfs_name oname; - struct xfs_name nname; - - xfs_dentry_to_name(&oname, odentry); - xfs_dentry_to_name(&nname, ndentry); - - return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), - XFS_I(ndir), &nname, new_inode ? - XFS_I(new_inode) : NULL); -} - -/* - * careful here - this function can get called recursively, so - * we need to be very careful about how much stack we use. - * uio is kmalloced for this reason... - */ -STATIC void * -xfs_vn_follow_link( - struct dentry *dentry, - struct nameidata *nd) -{ - char *link; - int error = -ENOMEM; - - link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); - if (!link) - goto out_err; - - error = -xfs_readlink(XFS_I(dentry->d_inode), link); - if (unlikely(error)) - goto out_kfree; - - nd_set_link(nd, link); - return NULL; - - out_kfree: - kfree(link); - out_err: - nd_set_link(nd, ERR_PTR(error)); - return NULL; -} - -STATIC void -xfs_vn_put_link( - struct dentry *dentry, - struct nameidata *nd, - void *p) -{ - char *s = nd_get_link(nd); - - if (!IS_ERR(s)) - kfree(s); -} - -STATIC int -xfs_vn_getattr( - struct vfsmount *mnt, - struct dentry *dentry, - struct kstat *stat) -{ - struct inode *inode = dentry->d_inode; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - - trace_xfs_getattr(ip); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - stat->size = XFS_ISIZE(ip); - stat->dev = inode->i_sb->s_dev; - stat->mode = ip->i_d.di_mode; - stat->nlink = ip->i_d.di_nlink; - stat->uid = ip->i_d.di_uid; - stat->gid = ip->i_d.di_gid; - stat->ino = ip->i_ino; - stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; - stat->ctime = inode->i_ctime; - stat->blocks = - XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); - - - switch (inode->i_mode & S_IFMT) { - case S_IFBLK: - case S_IFCHR: - stat->blksize = BLKDEV_IOSIZE; - stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, - sysv_minor(ip->i_df.if_u2.if_rdev)); - break; - default: - if (XFS_IS_REALTIME_INODE(ip)) { - /* - * If the file blocks are being allocated from a - * realtime volume, then return the inode's realtime - * extent size or the realtime volume's extent size. - */ - stat->blksize = - xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; - } else - stat->blksize = xfs_preferred_iosize(mp); - stat->rdev = 0; - break; - } - - return 0; -} - -int -xfs_setattr_nonsize( - struct xfs_inode *ip, - struct iattr *iattr, - int flags) -{ - xfs_mount_t *mp = ip->i_mount; - struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; - xfs_trans_t *tp; - int error; - uid_t uid = 0, iuid = 0; - gid_t gid = 0, igid = 0; - struct xfs_dquot *udqp = NULL, *gdqp = NULL; - struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; - - trace_xfs_setattr(ip); - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - error = -inode_change_ok(inode, iattr); - if (error) - return XFS_ERROR(error); - - ASSERT((mask & ATTR_SIZE) == 0); - - /* - * If disk quotas is on, we make sure that the dquots do exist on disk, - * before we start any other transactions. Trying to do this later - * is messy. We don't care to take a readlock to look at the ids - * in inode here, because we can't hold it across the trans_reserve. - * If the IDs do change before we take the ilock, we're covered - * because the i_*dquot fields will get updated anyway. - */ - if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { - uint qflags = 0; - - if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { - uid = iattr->ia_uid; - qflags |= XFS_QMOPT_UQUOTA; - } else { - uid = ip->i_d.di_uid; - } - if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { - gid = iattr->ia_gid; - qflags |= XFS_QMOPT_GQUOTA; - } else { - gid = ip->i_d.di_gid; - } - - /* - * We take a reference when we initialize udqp and gdqp, - * so it is important that we never blindly double trip on - * the same variable. See xfs_create() for an example. - */ - ASSERT(udqp == NULL); - ASSERT(gdqp == NULL); - error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), - qflags, &udqp, &gdqp); - if (error) - return error; - } - - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); - error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); - if (error) - goto out_dqrele; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * These IDs could have changed since we last looked at them. - * But, we're assured that if the ownership did change - * while we didn't have the inode locked, inode's dquot(s) - * would have changed also. - */ - iuid = ip->i_d.di_uid; - igid = ip->i_d.di_gid; - gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; - uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; - - /* - * Do a quota reservation only if uid/gid is actually - * going to change. - */ - if (XFS_IS_QUOTA_RUNNING(mp) && - ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || - (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { - ASSERT(tp); - error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, - capable(CAP_FOWNER) ? - XFS_QMOPT_FORCE_RES : 0); - if (error) /* out of quota */ - goto out_trans_cancel; - } - } - - xfs_trans_ijoin(tp, ip); - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * CAP_FSETID overrides the following restrictions: - * - * The set-user-ID and set-group-ID bits of a file will be - * cleared upon successful return from chown() - */ - if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && - !capable(CAP_FSETID)) - ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); - - /* - * Change the ownerships and register quota modifications - * in the transaction. - */ - if (iuid != uid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { - ASSERT(mask & ATTR_UID); - ASSERT(udqp); - olddquot1 = xfs_qm_vop_chown(tp, ip, - &ip->i_udquot, udqp); - } - ip->i_d.di_uid = uid; - inode->i_uid = uid; - } - if (igid != gid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { - ASSERT(!XFS_IS_PQUOTA_ON(mp)); - ASSERT(mask & ATTR_GID); - ASSERT(gdqp); - olddquot2 = xfs_qm_vop_chown(tp, ip, - &ip->i_gdquot, gdqp); - } - ip->i_d.di_gid = gid; - inode->i_gid = gid; - } - } - - /* - * Change file access modes. - */ - if (mask & ATTR_MODE) { - umode_t mode = iattr->ia_mode; - - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - mode &= ~S_ISGID; - - ip->i_d.di_mode &= S_IFMT; - ip->i_d.di_mode |= mode & ~S_IFMT; - - inode->i_mode &= S_IFMT; - inode->i_mode |= mode & ~S_IFMT; - } - - /* - * Change file access or modified times. - */ - if (mask & ATTR_ATIME) { - inode->i_atime = iattr->ia_atime; - ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; - ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - ip->i_update_core = 1; - } - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); - - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - /* - * Release any dquot(s) the inode had kept before chown. - */ - xfs_qm_dqrele(olddquot1); - xfs_qm_dqrele(olddquot2); - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - - if (error) - return XFS_ERROR(error); - - /* - * XXX(hch): Updating the ACL entries is not atomic vs the i_mode - * update. We could avoid this with linked transactions - * and passing down the transaction pointer all the way - * to attr_set. No previous user of the generic - * Posix ACL code seems to care about this issue either. - */ - if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { - error = -xfs_acl_chmod(inode); - if (error) - return XFS_ERROR(error); - } - - return 0; - -out_trans_cancel: - xfs_trans_cancel(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); -out_dqrele: - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - return error; -} - -/* - * Truncate file. Must have write permission and not be a directory. - */ -int -xfs_setattr_size( - struct xfs_inode *ip, - struct iattr *iattr, - int flags) -{ - struct xfs_mount *mp = ip->i_mount; - struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; - struct xfs_trans *tp; - int error; - uint lock_flags; - uint commit_flags = 0; - - trace_xfs_setattr(ip); - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - error = -inode_change_ok(inode, iattr); - if (error) - return XFS_ERROR(error); - - ASSERT(S_ISREG(ip->i_d.di_mode)); - ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| - ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| - ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); - - lock_flags = XFS_ILOCK_EXCL; - if (!(flags & XFS_ATTR_NOLOCK)) - lock_flags |= XFS_IOLOCK_EXCL; - xfs_ilock(ip, lock_flags); - - /* - * Short circuit the truncate case for zero length files. - */ - if (iattr->ia_size == 0 && - ip->i_size == 0 && ip->i_d.di_nextents == 0) { - if (!(mask & (ATTR_CTIME|ATTR_MTIME))) - goto out_unlock; - - /* - * Use the regular setattr path to update the timestamps. - */ - xfs_iunlock(ip, lock_flags); - iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(ip, iattr, 0); - } - - /* - * Make sure that the dquots are attached to the inode. - */ - error = xfs_qm_dqattach_locked(ip, 0); - if (error) - goto out_unlock; - - /* - * Now we can make the changes. Before we join the inode to the - * transaction, take care of the part of the truncation that must be - * done without the inode lock. This needs to be done before joining - * the inode to the transaction, because the inode cannot be unlocked - * once it is a part of the transaction. - */ - if (iattr->ia_size > ip->i_size) { - /* - * Do the first part of growing a file: zero any data in the - * last block that is beyond the old EOF. We need to do this - * before the inode is joined to the transaction to modify - * i_size. - */ - error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); - if (error) - goto out_unlock; - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - lock_flags &= ~XFS_ILOCK_EXCL; - - /* - * We are going to log the inode size change in this transaction so - * any previous writes that are beyond the on disk EOF and the new - * EOF that have not been written out need to be written here. If we - * do not write the data out, we expose ourselves to the null files - * problem. - * - * Only flush from the on disk size to the smaller of the in memory - * file size or the new size as that's the range we really care about - * here and prevents waiting for other data not within the range we - * care about here. - */ - if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { - error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, - XBF_ASYNC, FI_NONE); - if (error) - goto out_unlock; - } - - /* - * Wait for all I/O to complete. - */ - xfs_ioend_wait(ip); - - error = -block_truncate_page(inode->i_mapping, iattr->ia_size, - xfs_get_blocks); - if (error) - goto out_unlock; - - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); - error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT); - if (error) - goto out_trans_cancel; - - truncate_setsize(inode, iattr->ia_size); - - commit_flags = XFS_TRANS_RELEASE_LOG_RES; - lock_flags |= XFS_ILOCK_EXCL; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip); - - /* - * Only change the c/mtime if we are changing the size or we are - * explicitly asked to change it. This handles the semantic difference - * between truncate() and ftruncate() as implemented in the VFS. - * - * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a - * special case where we need to update the times despite not having - * these flags set. For all other operations the VFS set these flags - * explicitly if it wants a timestamp update. - */ - if (iattr->ia_size != ip->i_size && - (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { - iattr->ia_ctime = iattr->ia_mtime = - current_fs_time(inode->i_sb); - mask |= ATTR_CTIME | ATTR_MTIME; - } - - if (iattr->ia_size > ip->i_size) { - ip->i_d.di_size = iattr->ia_size; - ip->i_size = iattr->ia_size; - } else if (iattr->ia_size <= ip->i_size || - (iattr->ia_size == 0 && ip->i_d.di_nextents)) { - error = xfs_itruncate_data(&tp, ip, iattr->ia_size); - if (error) - goto out_trans_abort; - - /* - * Truncated "down", so we're removing references to old data - * here - if we delay flushing for a long time, we expose - * ourselves unduly to the notorious NULL files problem. So, - * we mark this inode and flush it when the file is closed, - * and do not wait the usual (long) time for writeout. - */ - xfs_iflags_set(ip, XFS_ITRUNCATED); - } - - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - ip->i_update_core = 1; - } - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); - - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); - - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); -out_unlock: - if (lock_flags) - xfs_iunlock(ip, lock_flags); - return error; - -out_trans_abort: - commit_flags |= XFS_TRANS_ABORT; -out_trans_cancel: - xfs_trans_cancel(tp, commit_flags); - goto out_unlock; -} - -STATIC int -xfs_vn_setattr( - struct dentry *dentry, - struct iattr *iattr) -{ - if (iattr->ia_valid & ATTR_SIZE) - return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); - return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); -} - -#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) - -/* - * Call fiemap helper to fill in user data. - * Returns positive errors to xfs_getbmap. - */ -STATIC int -xfs_fiemap_format( - void **arg, - struct getbmapx *bmv, - int *full) -{ - int error; - struct fiemap_extent_info *fieinfo = *arg; - u32 fiemap_flags = 0; - u64 logical, physical, length; - - /* Do nothing for a hole */ - if (bmv->bmv_block == -1LL) - return 0; - - logical = BBTOB(bmv->bmv_offset); - physical = BBTOB(bmv->bmv_block); - length = BBTOB(bmv->bmv_length); - - if (bmv->bmv_oflags & BMV_OF_PREALLOC) - fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; - else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { - fiemap_flags |= FIEMAP_EXTENT_DELALLOC; - physical = 0; /* no block yet */ - } - if (bmv->bmv_oflags & BMV_OF_LAST) - fiemap_flags |= FIEMAP_EXTENT_LAST; - - error = fiemap_fill_next_extent(fieinfo, logical, physical, - length, fiemap_flags); - if (error > 0) { - error = 0; - *full = 1; /* user array now full */ - } - - return -error; -} - -STATIC int -xfs_vn_fiemap( - struct inode *inode, - struct fiemap_extent_info *fieinfo, - u64 start, - u64 length) -{ - xfs_inode_t *ip = XFS_I(inode); - struct getbmapx bm; - int error; - - error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS); - if (error) - return error; - - /* Set up bmap header for xfs internal routine */ - bm.bmv_offset = BTOBB(start); - /* Special case for whole file */ - if (length == FIEMAP_MAX_OFFSET) - bm.bmv_length = -1LL; - else - bm.bmv_length = BTOBB(length); - - /* We add one because in getbmap world count includes the header */ - bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM : - fieinfo->fi_extents_max + 1; - bm.bmv_count = min_t(__s32, bm.bmv_count, - (PAGE_SIZE * 16 / sizeof(struct getbmapx))); - bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; - if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) - bm.bmv_iflags |= BMV_IF_ATTRFORK; - if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) - bm.bmv_iflags |= BMV_IF_DELALLOC; - - error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); - if (error) - return -error; - - return 0; -} - -static const struct inode_operations xfs_inode_operations = { - .get_acl = xfs_get_acl, - .getattr = xfs_vn_getattr, - .setattr = xfs_vn_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, - .listxattr = xfs_vn_listxattr, - .fiemap = xfs_vn_fiemap, -}; - -static const struct inode_operations xfs_dir_inode_operations = { - .create = xfs_vn_create, - .lookup = xfs_vn_lookup, - .link = xfs_vn_link, - .unlink = xfs_vn_unlink, - .symlink = xfs_vn_symlink, - .mkdir = xfs_vn_mkdir, - /* - * Yes, XFS uses the same method for rmdir and unlink. - * - * There are some subtile differences deeper in the code, - * but we use S_ISDIR to check for those. - */ - .rmdir = xfs_vn_unlink, - .mknod = xfs_vn_mknod, - .rename = xfs_vn_rename, - .get_acl = xfs_get_acl, - .getattr = xfs_vn_getattr, - .setattr = xfs_vn_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, - .listxattr = xfs_vn_listxattr, -}; - -static const struct inode_operations xfs_dir_ci_inode_operations = { - .create = xfs_vn_create, - .lookup = xfs_vn_ci_lookup, - .link = xfs_vn_link, - .unlink = xfs_vn_unlink, - .symlink = xfs_vn_symlink, - .mkdir = xfs_vn_mkdir, - /* - * Yes, XFS uses the same method for rmdir and unlink. - * - * There are some subtile differences deeper in the code, - * but we use S_ISDIR to check for those. - */ - .rmdir = xfs_vn_unlink, - .mknod = xfs_vn_mknod, - .rename = xfs_vn_rename, - .get_acl = xfs_get_acl, - .getattr = xfs_vn_getattr, - .setattr = xfs_vn_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, - .listxattr = xfs_vn_listxattr, -}; - -static const struct inode_operations xfs_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = xfs_vn_follow_link, - .put_link = xfs_vn_put_link, - .get_acl = xfs_get_acl, - .getattr = xfs_vn_getattr, - .setattr = xfs_vn_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, - .listxattr = xfs_vn_listxattr, -}; - -STATIC void -xfs_diflags_to_iflags( - struct inode *inode, - struct xfs_inode *ip) -{ - if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) - inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) - inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; -} - -/* - * Initialize the Linux inode, set up the operation vectors and - * unlock the inode. - * - * When reading existing inodes from disk this is called directly - * from xfs_iget, when creating a new inode it is called from - * xfs_ialloc after setting up the inode. - * - * We are always called with an uninitialised linux inode here. - * We need to initialise the necessary fields and take a reference - * on it. - */ -void -xfs_setup_inode( - struct xfs_inode *ip) -{ - struct inode *inode = &ip->i_vnode; - - inode->i_ino = ip->i_ino; - inode->i_state = I_NEW; - - inode_sb_list_add(inode); - /* make the inode look hashed for the writeback code */ - hlist_add_fake(&inode->i_hash); - - inode->i_mode = ip->i_d.di_mode; - inode->i_nlink = ip->i_d.di_nlink; - inode->i_uid = ip->i_d.di_uid; - inode->i_gid = ip->i_d.di_gid; - - switch (inode->i_mode & S_IFMT) { - case S_IFBLK: - case S_IFCHR: - inode->i_rdev = - MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, - sysv_minor(ip->i_df.if_u2.if_rdev)); - break; - default: - inode->i_rdev = 0; - break; - } - - inode->i_generation = ip->i_d.di_gen; - i_size_write(inode, ip->i_d.di_size); - inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; - inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; - inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; - inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; - inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; - inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; - xfs_diflags_to_iflags(inode, ip); - - switch (inode->i_mode & S_IFMT) { - case S_IFREG: - inode->i_op = &xfs_inode_operations; - inode->i_fop = &xfs_file_operations; - inode->i_mapping->a_ops = &xfs_address_space_operations; - break; - case S_IFDIR: - if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) - inode->i_op = &xfs_dir_ci_inode_operations; - else - inode->i_op = &xfs_dir_inode_operations; - inode->i_fop = &xfs_dir_file_operations; - break; - case S_IFLNK: - inode->i_op = &xfs_symlink_inode_operations; - if (!(ip->i_df.if_flags & XFS_IFINLINE)) - inode->i_mapping->a_ops = &xfs_address_space_operations; - break; - default: - inode->i_op = &xfs_inode_operations; - init_special_inode(inode, inode->i_mode, inode->i_rdev); - break; - } - - /* - * If there is no attribute fork no ACL can exist on this inode, - * and it can't have any file capabilities attached to it either. - */ - if (!XFS_IFORK_Q(ip)) { - inode_has_no_xattr(inode); - cache_no_acl(inode); - } - - xfs_iflags_clear(ip, XFS_INEW); - barrier(); - - unlock_new_inode(inode); -} diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h deleted file mode 100644 index ef41c92..0000000 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_IOPS_H__ -#define __XFS_IOPS_H__ - -struct xfs_inode; - -extern const struct file_operations xfs_file_operations; -extern const struct file_operations xfs_dir_file_operations; - -extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); - -extern void xfs_setup_inode(struct xfs_inode *); - -#endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h deleted file mode 100644 index 1e8a45e..0000000 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_LINUX__ -#define __XFS_LINUX__ - -#include - -/* - * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. - * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set. - */ -#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64) -# define XFS_BIG_BLKNOS 1 -# define XFS_BIG_INUMS 1 -#else -# define XFS_BIG_BLKNOS 0 -# define XFS_BIG_INUMS 0 -#endif - -#include "xfs_types.h" - -#include "kmem.h" -#include "mrlock.h" -#include "time.h" -#include "uuid.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "xfs_vnode.h" -#include "xfs_stats.h" -#include "xfs_sysctl.h" -#include "xfs_iops.h" -#include "xfs_aops.h" -#include "xfs_super.h" -#include "xfs_buf.h" -#include "xfs_message.h" - -#ifdef __BIG_ENDIAN -#define XFS_NATIVE_HOST 1 -#else -#undef XFS_NATIVE_HOST -#endif - -/* - * Feature macros (disable/enable) - */ -#ifdef CONFIG_SMP -#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ -#else -#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ -#endif - -#define irix_sgid_inherit xfs_params.sgid_inherit.val -#define irix_symlink_mode xfs_params.symlink_mode.val -#define xfs_panic_mask xfs_params.panic_mask.val -#define xfs_error_level xfs_params.error_level.val -#define xfs_syncd_centisecs xfs_params.syncd_timer.val -#define xfs_stats_clear xfs_params.stats_clear.val -#define xfs_inherit_sync xfs_params.inherit_sync.val -#define xfs_inherit_nodump xfs_params.inherit_nodump.val -#define xfs_inherit_noatime xfs_params.inherit_noatim.val -#define xfs_buf_timer_centisecs xfs_params.xfs_buf_timer.val -#define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val -#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val -#define xfs_rotorstep xfs_params.rotorstep.val -#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val -#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val - -#define current_cpu() (raw_smp_processor_id()) -#define current_pid() (current->pid) -#define current_test_flags(f) (current->flags & (f)) -#define current_set_flags_nested(sp, f) \ - (*(sp) = current->flags, current->flags |= (f)) -#define current_clear_flags_nested(sp, f) \ - (*(sp) = current->flags, current->flags &= ~(f)) -#define current_restore_flags_nested(sp, f) \ - (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) - -#define spinlock_destroy(lock) - -#define NBBY 8 /* number of bits per byte */ - -/* - * Size of block device i/o is parameterized here. - * Currently the system supports page-sized i/o. - */ -#define BLKDEV_IOSHIFT PAGE_CACHE_SHIFT -#define BLKDEV_IOSIZE (1<> 32; - __low = c; - if (__high) { - __upper = __high % (b); - __high = __high / (b); - } - asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); - asm("":"=A" (c):"a" (__low),"d" (__high)); - *(__u64 *)a = c; - return __mod; - } - } - - /* NOTREACHED */ - return 0; -} - -/* Side effect free 64 bit mod operation */ -static inline __u32 xfs_do_mod(void *a, __u32 b, int n) -{ - switch (n) { - case 4: - return *(__u32 *)a % b; - case 8: - { - unsigned long __upper, __low, __high, __mod; - __u64 c = *(__u64 *)a; - __upper = __high = c >> 32; - __low = c; - if (__high) { - __upper = __high % (b); - __high = __high / (b); - } - asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); - asm("":"=A" (c):"a" (__low),"d" (__high)); - return __mod; - } - } - - /* NOTREACHED */ - return 0; -} -#else -static inline __u32 xfs_do_div(void *a, __u32 b, int n) -{ - __u32 mod; - - switch (n) { - case 4: - mod = *(__u32 *)a % b; - *(__u32 *)a = *(__u32 *)a / b; - return mod; - case 8: - mod = do_div(*(__u64 *)a, b); - return mod; - } - - /* NOTREACHED */ - return 0; -} - -/* Side effect free 64 bit mod operation */ -static inline __u32 xfs_do_mod(void *a, __u32 b, int n) -{ - switch (n) { - case 4: - return *(__u32 *)a % b; - case 8: - { - __u64 c = *(__u64 *)a; - return do_div(c, b); - } - } - - /* NOTREACHED */ - return 0; -} -#endif - -#undef do_div -#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a)) -#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a)) - -static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) -{ - x += y - 1; - do_div(x, y); - return(x * y); -} - -static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) -{ - x += y - 1; - do_div(x, y); - return x; -} - -/* ARM old ABI has some weird alignment/padding */ -#if defined(__arm__) && !defined(__ARM_EABI__) -#define __arch_pack __attribute__((packed)) -#else -#define __arch_pack -#endif - -#define ASSERT_ALWAYS(expr) \ - (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) - -#ifndef DEBUG -#define ASSERT(expr) ((void)0) - -#ifndef STATIC -# define STATIC static noinline -#endif - -#else /* DEBUG */ - -#define ASSERT(expr) \ - (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) - -#ifndef STATIC -# define STATIC noinline -#endif - -#endif /* DEBUG */ - -#endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c deleted file mode 100644 index bd672de..0000000 --- a/fs/xfs/linux-2.6/xfs_message.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" - -/* - * XFS logging functions - */ -static void -__xfs_printk( - const char *level, - const struct xfs_mount *mp, - struct va_format *vaf) -{ - if (mp && mp->m_fsname) { - printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); - return; - } - printk("%sXFS: %pV\n", level, vaf); -} - -#define define_xfs_printk_level(func, kern_level) \ -void func(const struct xfs_mount *mp, const char *fmt, ...) \ -{ \ - struct va_format vaf; \ - va_list args; \ - \ - va_start(args, fmt); \ - \ - vaf.fmt = fmt; \ - vaf.va = &args; \ - \ - __xfs_printk(kern_level, mp, &vaf); \ - va_end(args); \ -} \ - -define_xfs_printk_level(xfs_emerg, KERN_EMERG); -define_xfs_printk_level(xfs_alert, KERN_ALERT); -define_xfs_printk_level(xfs_crit, KERN_CRIT); -define_xfs_printk_level(xfs_err, KERN_ERR); -define_xfs_printk_level(xfs_warn, KERN_WARNING); -define_xfs_printk_level(xfs_notice, KERN_NOTICE); -define_xfs_printk_level(xfs_info, KERN_INFO); -#ifdef DEBUG -define_xfs_printk_level(xfs_debug, KERN_DEBUG); -#endif - -void -xfs_alert_tag( - const struct xfs_mount *mp, - int panic_tag, - const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - int do_panic = 0; - - if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { - xfs_alert(mp, "Transforming an alert into a BUG."); - do_panic = 1; - } - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - __xfs_printk(KERN_ALERT, mp, &vaf); - va_end(args); - - BUG_ON(do_panic); -} - -void -assfail(char *expr, char *file, int line) -{ - xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d", - expr, file, line); - BUG(); -} - -void -xfs_hex_dump(void *p, int length) -{ - print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); -} diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h deleted file mode 100644 index 7fb7ea0..0000000 --- a/fs/xfs/linux-2.6/xfs_message.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef __XFS_MESSAGE_H -#define __XFS_MESSAGE_H 1 - -struct xfs_mount; - -extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, - const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))); -extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); - -#ifdef DEBUG -extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -#else -static inline void -__attribute__ ((format (printf, 2, 3))) -xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) -{ -} -#endif - -extern void assfail(char *expr, char *f, int l); - -extern void xfs_hex_dump(void *p, int length); - -#endif /* __XFS_MESSAGE_H */ diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c deleted file mode 100644 index 7e76f53..0000000 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2008, Christoph Hellwig - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_sb.h" -#include "xfs_inum.h" -#include "xfs_log.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_quota.h" -#include "xfs_trans.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_qm.h" -#include - - -STATIC int -xfs_quota_type(int type) -{ - switch (type) { - case USRQUOTA: - return XFS_DQ_USER; - case GRPQUOTA: - return XFS_DQ_GROUP; - default: - return XFS_DQ_PROJ; - } -} - -STATIC int -xfs_fs_get_xstate( - struct super_block *sb, - struct fs_quota_stat *fqs) -{ - struct xfs_mount *mp = XFS_M(sb); - - if (!XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; - return -xfs_qm_scall_getqstat(mp, fqs); -} - -STATIC int -xfs_fs_set_xstate( - struct super_block *sb, - unsigned int uflags, - int op) -{ - struct xfs_mount *mp = XFS_M(sb); - unsigned int flags = 0; - - if (sb->s_flags & MS_RDONLY) - return -EROFS; - if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; - - if (uflags & FS_QUOTA_UDQ_ACCT) - flags |= XFS_UQUOTA_ACCT; - if (uflags & FS_QUOTA_PDQ_ACCT) - flags |= XFS_PQUOTA_ACCT; - if (uflags & FS_QUOTA_GDQ_ACCT) - flags |= XFS_GQUOTA_ACCT; - if (uflags & FS_QUOTA_UDQ_ENFD) - flags |= XFS_UQUOTA_ENFD; - if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD)) - flags |= XFS_OQUOTA_ENFD; - - switch (op) { - case Q_XQUOTAON: - return -xfs_qm_scall_quotaon(mp, flags); - case Q_XQUOTAOFF: - if (!XFS_IS_QUOTA_ON(mp)) - return -EINVAL; - return -xfs_qm_scall_quotaoff(mp, flags); - case Q_XQUOTARM: - if (XFS_IS_QUOTA_ON(mp)) - return -EINVAL; - return -xfs_qm_scall_trunc_qfiles(mp, flags); - } - - return -EINVAL; -} - -STATIC int -xfs_fs_get_dqblk( - struct super_block *sb, - int type, - qid_t id, - struct fs_disk_quota *fdq) -{ - struct xfs_mount *mp = XFS_M(sb); - - if (!XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; - if (!XFS_IS_QUOTA_ON(mp)) - return -ESRCH; - - return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq); -} - -STATIC int -xfs_fs_set_dqblk( - struct super_block *sb, - int type, - qid_t id, - struct fs_disk_quota *fdq) -{ - struct xfs_mount *mp = XFS_M(sb); - - if (sb->s_flags & MS_RDONLY) - return -EROFS; - if (!XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; - if (!XFS_IS_QUOTA_ON(mp)) - return -ESRCH; - - return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); -} - -const struct quotactl_ops xfs_quotactl_operations = { - .get_xstate = xfs_fs_get_xstate, - .set_xstate = xfs_fs_set_xstate, - .get_dqblk = xfs_fs_get_dqblk, - .set_dqblk = xfs_fs_set_dqblk, -}; diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c deleted file mode 100644 index 76fdc58..0000000 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include - -DEFINE_PER_CPU(struct xfsstats, xfsstats); - -static int xfs_stat_proc_show(struct seq_file *m, void *v) -{ - int c, i, j, val; - __uint64_t xs_xstrat_bytes = 0; - __uint64_t xs_write_bytes = 0; - __uint64_t xs_read_bytes = 0; - - static const struct xstats_entry { - char *desc; - int endpoint; - } xstats[] = { - { "extent_alloc", XFSSTAT_END_EXTENT_ALLOC }, - { "abt", XFSSTAT_END_ALLOC_BTREE }, - { "blk_map", XFSSTAT_END_BLOCK_MAPPING }, - { "bmbt", XFSSTAT_END_BLOCK_MAP_BTREE }, - { "dir", XFSSTAT_END_DIRECTORY_OPS }, - { "trans", XFSSTAT_END_TRANSACTIONS }, - { "ig", XFSSTAT_END_INODE_OPS }, - { "log", XFSSTAT_END_LOG_OPS }, - { "push_ail", XFSSTAT_END_TAIL_PUSHING }, - { "xstrat", XFSSTAT_END_WRITE_CONVERT }, - { "rw", XFSSTAT_END_READ_WRITE_OPS }, - { "attr", XFSSTAT_END_ATTRIBUTE_OPS }, - { "icluster", XFSSTAT_END_INODE_CLUSTER }, - { "vnodes", XFSSTAT_END_VNODE_OPS }, - { "buf", XFSSTAT_END_BUF }, - { "abtb2", XFSSTAT_END_ABTB_V2 }, - { "abtc2", XFSSTAT_END_ABTC_V2 }, - { "bmbt2", XFSSTAT_END_BMBT_V2 }, - { "ibt2", XFSSTAT_END_IBT_V2 }, - }; - - /* Loop over all stats groups */ - for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { - seq_printf(m, "%s", xstats[i].desc); - /* inner loop does each group */ - while (j < xstats[i].endpoint) { - val = 0; - /* sum over all cpus */ - for_each_possible_cpu(c) - val += *(((__u32*)&per_cpu(xfsstats, c) + j)); - seq_printf(m, " %u", val); - j++; - } - seq_putc(m, '\n'); - } - /* extra precision counters */ - for_each_possible_cpu(i) { - xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; - xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; - xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; - } - - seq_printf(m, "xpc %Lu %Lu %Lu\n", - xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); - seq_printf(m, "debug %u\n", -#if defined(DEBUG) - 1); -#else - 0); -#endif - return 0; -} - -static int xfs_stat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, xfs_stat_proc_show, NULL); -} - -static const struct file_operations xfs_stat_proc_fops = { - .owner = THIS_MODULE, - .open = xfs_stat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -int -xfs_init_procfs(void) -{ - if (!proc_mkdir("fs/xfs", NULL)) - goto out; - - if (!proc_create("fs/xfs/stat", 0, NULL, - &xfs_stat_proc_fops)) - goto out_remove_entry; - return 0; - - out_remove_entry: - remove_proc_entry("fs/xfs", NULL); - out: - return -ENOMEM; -} - -void -xfs_cleanup_procfs(void) -{ - remove_proc_entry("fs/xfs/stat", NULL); - remove_proc_entry("fs/xfs", NULL); -} diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h deleted file mode 100644 index 736854b..0000000 --- a/fs/xfs/linux-2.6/xfs_stats.h +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_STATS_H__ -#define __XFS_STATS_H__ - - -#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) - -#include - -/* - * XFS global statistics - */ -struct xfsstats { -# define XFSSTAT_END_EXTENT_ALLOC 4 - __uint32_t xs_allocx; - __uint32_t xs_allocb; - __uint32_t xs_freex; - __uint32_t xs_freeb; -# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4) - __uint32_t xs_abt_lookup; - __uint32_t xs_abt_compare; - __uint32_t xs_abt_insrec; - __uint32_t xs_abt_delrec; -# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7) - __uint32_t xs_blk_mapr; - __uint32_t xs_blk_mapw; - __uint32_t xs_blk_unmap; - __uint32_t xs_add_exlist; - __uint32_t xs_del_exlist; - __uint32_t xs_look_exlist; - __uint32_t xs_cmp_exlist; -# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4) - __uint32_t xs_bmbt_lookup; - __uint32_t xs_bmbt_compare; - __uint32_t xs_bmbt_insrec; - __uint32_t xs_bmbt_delrec; -# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4) - __uint32_t xs_dir_lookup; - __uint32_t xs_dir_create; - __uint32_t xs_dir_remove; - __uint32_t xs_dir_getdents; -# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3) - __uint32_t xs_trans_sync; - __uint32_t xs_trans_async; - __uint32_t xs_trans_empty; -# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7) - __uint32_t xs_ig_attempts; - __uint32_t xs_ig_found; - __uint32_t xs_ig_frecycle; - __uint32_t xs_ig_missed; - __uint32_t xs_ig_dup; - __uint32_t xs_ig_reclaims; - __uint32_t xs_ig_attrchg; -# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5) - __uint32_t xs_log_writes; - __uint32_t xs_log_blocks; - __uint32_t xs_log_noiclogs; - __uint32_t xs_log_force; - __uint32_t xs_log_force_sleep; -# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10) - __uint32_t xs_try_logspace; - __uint32_t xs_sleep_logspace; - __uint32_t xs_push_ail; - __uint32_t xs_push_ail_success; - __uint32_t xs_push_ail_pushbuf; - __uint32_t xs_push_ail_pinned; - __uint32_t xs_push_ail_locked; - __uint32_t xs_push_ail_flushing; - __uint32_t xs_push_ail_restarts; - __uint32_t xs_push_ail_flush; -# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2) - __uint32_t xs_xstrat_quick; - __uint32_t xs_xstrat_split; -# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2) - __uint32_t xs_write_calls; - __uint32_t xs_read_calls; -# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4) - __uint32_t xs_attr_get; - __uint32_t xs_attr_set; - __uint32_t xs_attr_remove; - __uint32_t xs_attr_list; -# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3) - __uint32_t xs_iflush_count; - __uint32_t xs_icluster_flushcnt; - __uint32_t xs_icluster_flushinode; -# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8) - __uint32_t vn_active; /* # vnodes not on free lists */ - __uint32_t vn_alloc; /* # times vn_alloc called */ - __uint32_t vn_get; /* # times vn_get called */ - __uint32_t vn_hold; /* # times vn_hold called */ - __uint32_t vn_rele; /* # times vn_rele called */ - __uint32_t vn_reclaim; /* # times vn_reclaim called */ - __uint32_t vn_remove; /* # times vn_remove called */ - __uint32_t vn_free; /* # times vn_free called */ -#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) - __uint32_t xb_get; - __uint32_t xb_create; - __uint32_t xb_get_locked; - __uint32_t xb_get_locked_waited; - __uint32_t xb_busy_locked; - __uint32_t xb_miss_locked; - __uint32_t xb_page_retries; - __uint32_t xb_page_found; - __uint32_t xb_get_read; -/* Version 2 btree counters */ -#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15) - __uint32_t xs_abtb_2_lookup; - __uint32_t xs_abtb_2_compare; - __uint32_t xs_abtb_2_insrec; - __uint32_t xs_abtb_2_delrec; - __uint32_t xs_abtb_2_newroot; - __uint32_t xs_abtb_2_killroot; - __uint32_t xs_abtb_2_increment; - __uint32_t xs_abtb_2_decrement; - __uint32_t xs_abtb_2_lshift; - __uint32_t xs_abtb_2_rshift; - __uint32_t xs_abtb_2_split; - __uint32_t xs_abtb_2_join; - __uint32_t xs_abtb_2_alloc; - __uint32_t xs_abtb_2_free; - __uint32_t xs_abtb_2_moves; -#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15) - __uint32_t xs_abtc_2_lookup; - __uint32_t xs_abtc_2_compare; - __uint32_t xs_abtc_2_insrec; - __uint32_t xs_abtc_2_delrec; - __uint32_t xs_abtc_2_newroot; - __uint32_t xs_abtc_2_killroot; - __uint32_t xs_abtc_2_increment; - __uint32_t xs_abtc_2_decrement; - __uint32_t xs_abtc_2_lshift; - __uint32_t xs_abtc_2_rshift; - __uint32_t xs_abtc_2_split; - __uint32_t xs_abtc_2_join; - __uint32_t xs_abtc_2_alloc; - __uint32_t xs_abtc_2_free; - __uint32_t xs_abtc_2_moves; -#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15) - __uint32_t xs_bmbt_2_lookup; - __uint32_t xs_bmbt_2_compare; - __uint32_t xs_bmbt_2_insrec; - __uint32_t xs_bmbt_2_delrec; - __uint32_t xs_bmbt_2_newroot; - __uint32_t xs_bmbt_2_killroot; - __uint32_t xs_bmbt_2_increment; - __uint32_t xs_bmbt_2_decrement; - __uint32_t xs_bmbt_2_lshift; - __uint32_t xs_bmbt_2_rshift; - __uint32_t xs_bmbt_2_split; - __uint32_t xs_bmbt_2_join; - __uint32_t xs_bmbt_2_alloc; - __uint32_t xs_bmbt_2_free; - __uint32_t xs_bmbt_2_moves; -#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15) - __uint32_t xs_ibt_2_lookup; - __uint32_t xs_ibt_2_compare; - __uint32_t xs_ibt_2_insrec; - __uint32_t xs_ibt_2_delrec; - __uint32_t xs_ibt_2_newroot; - __uint32_t xs_ibt_2_killroot; - __uint32_t xs_ibt_2_increment; - __uint32_t xs_ibt_2_decrement; - __uint32_t xs_ibt_2_lshift; - __uint32_t xs_ibt_2_rshift; - __uint32_t xs_ibt_2_split; - __uint32_t xs_ibt_2_join; - __uint32_t xs_ibt_2_alloc; - __uint32_t xs_ibt_2_free; - __uint32_t xs_ibt_2_moves; -/* Extra precision counters */ - __uint64_t xs_xstrat_bytes; - __uint64_t xs_write_bytes; - __uint64_t xs_read_bytes; -}; - -DECLARE_PER_CPU(struct xfsstats, xfsstats); - -/* - * We don't disable preempt, not too worried about poking the - * wrong CPU's stat for now (also aggregated before reporting). - */ -#define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++) -#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) -#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) - -extern int xfs_init_procfs(void); -extern void xfs_cleanup_procfs(void); - - -#else /* !CONFIG_PROC_FS */ - -# define XFS_STATS_INC(count) -# define XFS_STATS_DEC(count) -# define XFS_STATS_ADD(count, inc) - -static inline int xfs_init_procfs(void) -{ - return 0; -} - -static inline void xfs_cleanup_procfs(void) -{ -} - -#endif /* !CONFIG_PROC_FS */ - -#endif /* __XFS_STATS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c deleted file mode 100644 index 9a72dda..0000000 --- a/fs/xfs/linux-2.6/xfs_super.c +++ /dev/null @@ -1,1773 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "xfs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_fsops.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_utils.h" -#include "xfs_vnodeops.h" -#include "xfs_log_priv.h" -#include "xfs_trans_priv.h" -#include "xfs_filestream.h" -#include "xfs_da_btree.h" -#include "xfs_extfree_item.h" -#include "xfs_mru_cache.h" -#include "xfs_inode_item.h" -#include "xfs_sync.h" -#include "xfs_trace.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static const struct super_operations xfs_super_operations; -static kmem_zone_t *xfs_ioend_zone; -mempool_t *xfs_ioend_pool; - -#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ -#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ -#define MNTOPT_LOGDEV "logdev" /* log device */ -#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */ -#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */ -#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */ -#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */ -#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */ -#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */ -#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ -#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ -#define MNTOPT_MTPT "mtpt" /* filesystem mount point */ -#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ -#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ -#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ -#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ -#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ -#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ -#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and - * unwritten extent conversion */ -#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ -#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ -#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ -#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ -#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ -#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes - * in stat(). */ -#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ -#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ -#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */ -#define MNTOPT_QUOTA "quota" /* disk quotas (user) */ -#define MNTOPT_NOQUOTA "noquota" /* no quotas */ -#define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */ -#define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */ -#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */ -#define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */ -#define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */ -#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */ -#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */ -#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ -#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ -#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ -#define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */ -#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */ -#define MNTOPT_DISCARD "discard" /* Discard unused blocks */ -#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ - -/* - * Table driven mount option parser. - * - * Currently only used for remount, but it will be used for mount - * in the future, too. - */ -enum { - Opt_barrier, Opt_nobarrier, Opt_err -}; - -static const match_table_t tokens = { - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_err, NULL} -}; - - -STATIC unsigned long -suffix_strtoul(char *s, char **endp, unsigned int base) -{ - int last, shift_left_factor = 0; - char *value = s; - - last = strlen(value) - 1; - if (value[last] == 'K' || value[last] == 'k') { - shift_left_factor = 10; - value[last] = '\0'; - } - if (value[last] == 'M' || value[last] == 'm') { - shift_left_factor = 20; - value[last] = '\0'; - } - if (value[last] == 'G' || value[last] == 'g') { - shift_left_factor = 30; - value[last] = '\0'; - } - - return simple_strtoul((const char *)s, endp, base) << shift_left_factor; -} - -/* - * This function fills in xfs_mount_t fields based on mount args. - * Note: the superblock has _not_ yet been read in. - * - * Note that this function leaks the various device name allocations on - * failure. The caller takes care of them. - */ -STATIC int -xfs_parseargs( - struct xfs_mount *mp, - char *options) -{ - struct super_block *sb = mp->m_super; - char *this_char, *value, *eov; - int dsunit = 0; - int dswidth = 0; - int iosize = 0; - __uint8_t iosizelog = 0; - - /* - * set up the mount name first so all the errors will refer to the - * correct device. - */ - mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); - if (!mp->m_fsname) - return ENOMEM; - mp->m_fsname_len = strlen(mp->m_fsname) + 1; - - /* - * Copy binary VFS mount flags we are interested in. - */ - if (sb->s_flags & MS_RDONLY) - mp->m_flags |= XFS_MOUNT_RDONLY; - if (sb->s_flags & MS_DIRSYNC) - mp->m_flags |= XFS_MOUNT_DIRSYNC; - if (sb->s_flags & MS_SYNCHRONOUS) - mp->m_flags |= XFS_MOUNT_WSYNC; - - /* - * Set some default flags that could be cleared by the mount option - * parsing. - */ - mp->m_flags |= XFS_MOUNT_BARRIER; - mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - mp->m_flags |= XFS_MOUNT_DELAYLOG; - - /* - * These can be overridden by the mount option parsing. - */ - mp->m_logbufs = -1; - mp->m_logbsize = -1; - - if (!options) - goto done; - - while ((this_char = strsep(&options, ",")) != NULL) { - if (!*this_char) - continue; - if ((value = strchr(this_char, '=')) != NULL) - *value++ = 0; - - if (!strcmp(this_char, MNTOPT_LOGBUFS)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - mp->m_logbufs = simple_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - mp->m_logbsize = suffix_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); - if (!mp->m_logname) - return ENOMEM; - } else if (!strcmp(this_char, MNTOPT_MTPT)) { - xfs_warn(mp, "%s option not allowed on this system", - this_char); - return EINVAL; - } else if (!strcmp(this_char, MNTOPT_RTDEV)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); - if (!mp->m_rtname) - return ENOMEM; - } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - iosize = simple_strtoul(value, &eov, 10); - iosizelog = ffs(iosize) - 1; - } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - iosize = suffix_strtoul(value, &eov, 10); - iosizelog = ffs(iosize) - 1; - } else if (!strcmp(this_char, MNTOPT_GRPID) || - !strcmp(this_char, MNTOPT_BSDGROUPS)) { - mp->m_flags |= XFS_MOUNT_GRPID; - } else if (!strcmp(this_char, MNTOPT_NOGRPID) || - !strcmp(this_char, MNTOPT_SYSVGROUPS)) { - mp->m_flags &= ~XFS_MOUNT_GRPID; - } else if (!strcmp(this_char, MNTOPT_WSYNC)) { - mp->m_flags |= XFS_MOUNT_WSYNC; - } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { - mp->m_flags |= XFS_MOUNT_NORECOVERY; - } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { - mp->m_flags |= XFS_MOUNT_NOALIGN; - } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { - mp->m_flags |= XFS_MOUNT_SWALLOC; - } else if (!strcmp(this_char, MNTOPT_SUNIT)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - dsunit = simple_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - dswidth = simple_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { - mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; -#if !XFS_BIG_INUMS - xfs_warn(mp, "%s option not allowed on this system", - this_char); - return EINVAL; -#endif - } else if (!strcmp(this_char, MNTOPT_NOUUID)) { - mp->m_flags |= XFS_MOUNT_NOUUID; - } else if (!strcmp(this_char, MNTOPT_BARRIER)) { - mp->m_flags |= XFS_MOUNT_BARRIER; - } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { - mp->m_flags &= ~XFS_MOUNT_BARRIER; - } else if (!strcmp(this_char, MNTOPT_IKEEP)) { - mp->m_flags |= XFS_MOUNT_IKEEP; - } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { - mp->m_flags &= ~XFS_MOUNT_IKEEP; - } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { - mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; - } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { - mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; - } else if (!strcmp(this_char, MNTOPT_ATTR2)) { - mp->m_flags |= XFS_MOUNT_ATTR2; - } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { - mp->m_flags &= ~XFS_MOUNT_ATTR2; - mp->m_flags |= XFS_MOUNT_NOATTR2; - } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { - mp->m_flags |= XFS_MOUNT_FILESTREAMS; - } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { - mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | - XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | - XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | - XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD); - } else if (!strcmp(this_char, MNTOPT_QUOTA) || - !strcmp(this_char, MNTOPT_UQUOTA) || - !strcmp(this_char, MNTOPT_USRQUOTA)) { - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | - XFS_UQUOTA_ENFD); - } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || - !strcmp(this_char, MNTOPT_UQUOTANOENF)) { - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_UQUOTA_ENFD; - } else if (!strcmp(this_char, MNTOPT_PQUOTA) || - !strcmp(this_char, MNTOPT_PRJQUOTA)) { - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | - XFS_OQUOTA_ENFD); - } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_OQUOTA_ENFD; - } else if (!strcmp(this_char, MNTOPT_GQUOTA) || - !strcmp(this_char, MNTOPT_GRPQUOTA)) { - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | - XFS_OQUOTA_ENFD); - } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_OQUOTA_ENFD; - } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { - mp->m_flags |= XFS_MOUNT_DELAYLOG; - } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { - mp->m_flags &= ~XFS_MOUNT_DELAYLOG; - } else if (!strcmp(this_char, MNTOPT_DISCARD)) { - mp->m_flags |= XFS_MOUNT_DISCARD; - } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { - mp->m_flags &= ~XFS_MOUNT_DISCARD; - } else if (!strcmp(this_char, "ihashsize")) { - xfs_warn(mp, - "ihashsize no longer used, option is deprecated."); - } else if (!strcmp(this_char, "osyncisdsync")) { - xfs_warn(mp, - "osyncisdsync has no effect, option is deprecated."); - } else if (!strcmp(this_char, "osyncisosync")) { - xfs_warn(mp, - "osyncisosync has no effect, option is deprecated."); - } else if (!strcmp(this_char, "irixsgid")) { - xfs_warn(mp, - "irixsgid is now a sysctl(2) variable, option is deprecated."); - } else { - xfs_warn(mp, "unknown mount option [%s].", this_char); - return EINVAL; - } - } - - /* - * no recovery flag requires a read-only mount - */ - if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && - !(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_warn(mp, "no-recovery mounts must be read-only."); - return EINVAL; - } - - if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { - xfs_warn(mp, - "sunit and swidth options incompatible with the noalign option"); - return EINVAL; - } - - if ((mp->m_flags & XFS_MOUNT_DISCARD) && - !(mp->m_flags & XFS_MOUNT_DELAYLOG)) { - xfs_warn(mp, - "the discard option is incompatible with the nodelaylog option"); - return EINVAL; - } - -#ifndef CONFIG_XFS_QUOTA - if (XFS_IS_QUOTA_RUNNING(mp)) { - xfs_warn(mp, "quota support not available in this kernel."); - return EINVAL; - } -#endif - - if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && - (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { - xfs_warn(mp, "cannot mount with both project and group quota"); - return EINVAL; - } - - if ((dsunit && !dswidth) || (!dsunit && dswidth)) { - xfs_warn(mp, "sunit and swidth must be specified together"); - return EINVAL; - } - - if (dsunit && (dswidth % dsunit != 0)) { - xfs_warn(mp, - "stripe width (%d) must be a multiple of the stripe unit (%d)", - dswidth, dsunit); - return EINVAL; - } - -done: - if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { - /* - * At this point the superblock has not been read - * in, therefore we do not know the block size. - * Before the mount call ends we will convert - * these to FSBs. - */ - if (dsunit) { - mp->m_dalign = dsunit; - mp->m_flags |= XFS_MOUNT_RETERR; - } - - if (dswidth) - mp->m_swidth = dswidth; - } - - if (mp->m_logbufs != -1 && - mp->m_logbufs != 0 && - (mp->m_logbufs < XLOG_MIN_ICLOGS || - mp->m_logbufs > XLOG_MAX_ICLOGS)) { - xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", - mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); - return XFS_ERROR(EINVAL); - } - if (mp->m_logbsize != -1 && - mp->m_logbsize != 0 && - (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || - mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || - !is_power_of_2(mp->m_logbsize))) { - xfs_warn(mp, - "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", - mp->m_logbsize); - return XFS_ERROR(EINVAL); - } - - if (iosizelog) { - if (iosizelog > XFS_MAX_IO_LOG || - iosizelog < XFS_MIN_IO_LOG) { - xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", - iosizelog, XFS_MIN_IO_LOG, - XFS_MAX_IO_LOG); - return XFS_ERROR(EINVAL); - } - - mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; - mp->m_readio_log = iosizelog; - mp->m_writeio_log = iosizelog; - } - - return 0; -} - -struct proc_xfs_info { - int flag; - char *str; -}; - -STATIC int -xfs_showargs( - struct xfs_mount *mp, - struct seq_file *m) -{ - static struct proc_xfs_info xfs_info_set[] = { - /* the few simple ones we can get from the mount struct */ - { XFS_MOUNT_IKEEP, "," MNTOPT_IKEEP }, - { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, - { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, - { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, - { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, - { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, - { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, - { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, - { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, - { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, - { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, - { 0, NULL } - }; - static struct proc_xfs_info xfs_info_unset[] = { - /* the few simple ones we can get from the mount struct */ - { XFS_MOUNT_COMPAT_IOSIZE, "," MNTOPT_LARGEIO }, - { XFS_MOUNT_BARRIER, "," MNTOPT_NOBARRIER }, - { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_64BITINODE }, - { 0, NULL } - }; - struct proc_xfs_info *xfs_infop; - - for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { - if (mp->m_flags & xfs_infop->flag) - seq_puts(m, xfs_infop->str); - } - for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) { - if (!(mp->m_flags & xfs_infop->flag)) - seq_puts(m, xfs_infop->str); - } - - if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) - seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", - (int)(1 << mp->m_writeio_log) >> 10); - - if (mp->m_logbufs > 0) - seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); - if (mp->m_logbsize > 0) - seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); - - if (mp->m_logname) - seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); - if (mp->m_rtname) - seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); - - if (mp->m_dalign > 0) - seq_printf(m, "," MNTOPT_SUNIT "=%d", - (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); - if (mp->m_swidth > 0) - seq_printf(m, "," MNTOPT_SWIDTH "=%d", - (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); - - if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD)) - seq_puts(m, "," MNTOPT_USRQUOTA); - else if (mp->m_qflags & XFS_UQUOTA_ACCT) - seq_puts(m, "," MNTOPT_UQUOTANOENF); - - /* Either project or group quotas can be active, not both */ - - if (mp->m_qflags & XFS_PQUOTA_ACCT) { - if (mp->m_qflags & XFS_OQUOTA_ENFD) - seq_puts(m, "," MNTOPT_PRJQUOTA); - else - seq_puts(m, "," MNTOPT_PQUOTANOENF); - } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { - if (mp->m_qflags & XFS_OQUOTA_ENFD) - seq_puts(m, "," MNTOPT_GRPQUOTA); - else - seq_puts(m, "," MNTOPT_GQUOTANOENF); - } - - if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) - seq_puts(m, "," MNTOPT_NOQUOTA); - - return 0; -} -__uint64_t -xfs_max_file_offset( - unsigned int blockshift) -{ - unsigned int pagefactor = 1; - unsigned int bitshift = BITS_PER_LONG - 1; - - /* Figure out maximum filesize, on Linux this can depend on - * the filesystem blocksize (on 32 bit platforms). - * __block_write_begin does this in an [unsigned] long... - * page->index << (PAGE_CACHE_SHIFT - bbits) - * So, for page sized blocks (4K on 32 bit platforms), - * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is - * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) - * but for smaller blocksizes it is less (bbits = log2 bsize). - * Note1: get_block_t takes a long (implicit cast from above) - * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch - * can optionally convert the [unsigned] long from above into - * an [unsigned] long long. - */ - -#if BITS_PER_LONG == 32 -# if defined(CONFIG_LBDAF) - ASSERT(sizeof(sector_t) == 8); - pagefactor = PAGE_CACHE_SIZE; - bitshift = BITS_PER_LONG; -# else - pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift); -# endif -#endif - - return (((__uint64_t)pagefactor) << bitshift) - 1; -} - -STATIC int -xfs_blkdev_get( - xfs_mount_t *mp, - const char *name, - struct block_device **bdevp) -{ - int error = 0; - - *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - mp); - if (IS_ERR(*bdevp)) { - error = PTR_ERR(*bdevp); - xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); - } - - return -error; -} - -STATIC void -xfs_blkdev_put( - struct block_device *bdev) -{ - if (bdev) - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); -} - -void -xfs_blkdev_issue_flush( - xfs_buftarg_t *buftarg) -{ - blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); -} - -STATIC void -xfs_close_devices( - struct xfs_mount *mp) -{ - if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { - struct block_device *logdev = mp->m_logdev_targp->bt_bdev; - xfs_free_buftarg(mp, mp->m_logdev_targp); - xfs_blkdev_put(logdev); - } - if (mp->m_rtdev_targp) { - struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; - xfs_free_buftarg(mp, mp->m_rtdev_targp); - xfs_blkdev_put(rtdev); - } - xfs_free_buftarg(mp, mp->m_ddev_targp); -} - -/* - * The file system configurations are: - * (1) device (partition) with data and internal log - * (2) logical volume with data and log subvolumes. - * (3) logical volume with data, log, and realtime subvolumes. - * - * We only have to handle opening the log and realtime volumes here if - * they are present. The data subvolume has already been opened by - * get_sb_bdev() and is stored in sb->s_bdev. - */ -STATIC int -xfs_open_devices( - struct xfs_mount *mp) -{ - struct block_device *ddev = mp->m_super->s_bdev; - struct block_device *logdev = NULL, *rtdev = NULL; - int error; - - /* - * Open real time and log devices - order is important. - */ - if (mp->m_logname) { - error = xfs_blkdev_get(mp, mp->m_logname, &logdev); - if (error) - goto out; - } - - if (mp->m_rtname) { - error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev); - if (error) - goto out_close_logdev; - - if (rtdev == ddev || rtdev == logdev) { - xfs_warn(mp, - "Cannot mount filesystem with identical rtdev and ddev/logdev."); - error = EINVAL; - goto out_close_rtdev; - } - } - - /* - * Setup xfs_mount buffer target pointers - */ - error = ENOMEM; - mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); - if (!mp->m_ddev_targp) - goto out_close_rtdev; - - if (rtdev) { - mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, - mp->m_fsname); - if (!mp->m_rtdev_targp) - goto out_free_ddev_targ; - } - - if (logdev && logdev != ddev) { - mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, - mp->m_fsname); - if (!mp->m_logdev_targp) - goto out_free_rtdev_targ; - } else { - mp->m_logdev_targp = mp->m_ddev_targp; - } - - return 0; - - out_free_rtdev_targ: - if (mp->m_rtdev_targp) - xfs_free_buftarg(mp, mp->m_rtdev_targp); - out_free_ddev_targ: - xfs_free_buftarg(mp, mp->m_ddev_targp); - out_close_rtdev: - if (rtdev) - xfs_blkdev_put(rtdev); - out_close_logdev: - if (logdev && logdev != ddev) - xfs_blkdev_put(logdev); - out: - return error; -} - -/* - * Setup xfs_mount buffer target pointers based on superblock - */ -STATIC int -xfs_setup_devices( - struct xfs_mount *mp) -{ - int error; - - error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize, - mp->m_sb.sb_sectsize); - if (error) - return error; - - if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { - unsigned int log_sector_size = BBSIZE; - - if (xfs_sb_version_hassector(&mp->m_sb)) - log_sector_size = mp->m_sb.sb_logsectsize; - error = xfs_setsize_buftarg(mp->m_logdev_targp, - mp->m_sb.sb_blocksize, - log_sector_size); - if (error) - return error; - } - if (mp->m_rtdev_targp) { - error = xfs_setsize_buftarg(mp->m_rtdev_targp, - mp->m_sb.sb_blocksize, - mp->m_sb.sb_sectsize); - if (error) - return error; - } - - return 0; -} - -/* Catch misguided souls that try to use this interface on XFS */ -STATIC struct inode * -xfs_fs_alloc_inode( - struct super_block *sb) -{ - BUG(); - return NULL; -} - -/* - * Now that the generic code is guaranteed not to be accessing - * the linux inode, we can reclaim the inode. - */ -STATIC void -xfs_fs_destroy_inode( - struct inode *inode) -{ - struct xfs_inode *ip = XFS_I(inode); - - trace_xfs_destroy_inode(ip); - - XFS_STATS_INC(vn_reclaim); - - /* bad inode, get out here ASAP */ - if (is_bad_inode(inode)) - goto out_reclaim; - - xfs_ioend_wait(ip); - - ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); - - /* - * We should never get here with one of the reclaim flags already set. - */ - ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); - - /* - * We always use background reclaim here because even if the - * inode is clean, it still may be under IO and hence we have - * to take the flush lock. The background reclaim path handles - * this more efficiently than we can here, so simply let background - * reclaim tear down all inodes. - */ -out_reclaim: - xfs_inode_set_reclaim_tag(ip); -} - -/* - * Slab object creation initialisation for the XFS inode. - * This covers only the idempotent fields in the XFS inode; - * all other fields need to be initialised on allocation - * from the slab. This avoids the need to repeatedly initialise - * fields in the xfs inode that left in the initialise state - * when freeing the inode. - */ -STATIC void -xfs_fs_inode_init_once( - void *inode) -{ - struct xfs_inode *ip = inode; - - memset(ip, 0, sizeof(struct xfs_inode)); - - /* vfs inode */ - inode_init_once(VFS_I(ip)); - - /* xfs inode */ - atomic_set(&ip->i_iocount, 0); - atomic_set(&ip->i_pincount, 0); - spin_lock_init(&ip->i_flags_lock); - init_waitqueue_head(&ip->i_ipin_wait); - /* - * Because we want to use a counting completion, complete - * the flush completion once to allow a single access to - * the flush completion without blocking. - */ - init_completion(&ip->i_flush); - complete(&ip->i_flush); - - mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, - "xfsino", ip->i_ino); -} - -/* - * Dirty the XFS inode when mark_inode_dirty_sync() is called so that - * we catch unlogged VFS level updates to the inode. - * - * We need the barrier() to maintain correct ordering between unlogged - * updates and the transaction commit code that clears the i_update_core - * field. This requires all updates to be completed before marking the - * inode dirty. - */ -STATIC void -xfs_fs_dirty_inode( - struct inode *inode, - int flags) -{ - barrier(); - XFS_I(inode)->i_update_core = 1; -} - -STATIC int -xfs_log_inode( - struct xfs_inode *ip) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - int error; - - xfs_iunlock(ip, XFS_ILOCK_SHARED); - tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); - error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); - - if (error) { - xfs_trans_cancel(tp, 0); - /* we need to return with the lock hold shared */ - xfs_ilock(ip, XFS_ILOCK_SHARED); - return error; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - /* - * Note - it's possible that we might have pushed ourselves out of the - * way during trans_reserve which would flush the inode. But there's - * no guarantee that the inode buffer has actually gone out yet (it's - * delwri). Plus the buffer could be pinned anyway if it's part of - * an inode in another recent transaction. So we play it safe and - * fire off the transaction anyway. - */ - xfs_trans_ijoin(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp, 0); - xfs_ilock_demote(ip, XFS_ILOCK_EXCL); - - return error; -} - -STATIC int -xfs_fs_write_inode( - struct inode *inode, - struct writeback_control *wbc) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - int error = EAGAIN; - - trace_xfs_write_inode(ip); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - if (wbc->sync_mode == WB_SYNC_ALL) { - /* - * Make sure the inode has made it it into the log. Instead - * of forcing it all the way to stable storage using a - * synchronous transaction we let the log force inside the - * ->sync_fs call do that for thus, which reduces the number - * of synchronous log foces dramatically. - */ - xfs_ioend_wait(ip); - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (ip->i_update_core) { - error = xfs_log_inode(ip); - if (error) - goto out_unlock; - } - } else { - /* - * We make this non-blocking if the inode is contended, return - * EAGAIN to indicate to the caller that they did not succeed. - * This prevents the flush path from blocking on inodes inside - * another operation right now, they get caught later by - * xfs_sync. - */ - if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) - goto out; - - if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) - goto out_unlock; - - /* - * Now we have the flush lock and the inode is not pinned, we - * can check if the inode is really clean as we know that - * there are no pending transaction completions, it is not - * waiting on the delayed write queue and there is no IO in - * progress. - */ - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - error = 0; - goto out_unlock; - } - error = xfs_iflush(ip, SYNC_TRYLOCK); - } - - out_unlock: - xfs_iunlock(ip, XFS_ILOCK_SHARED); - out: - /* - * if we failed to write out the inode then mark - * it dirty again so we'll try again later. - */ - if (error) - xfs_mark_inode_dirty_sync(ip); - return -error; -} - -STATIC void -xfs_fs_evict_inode( - struct inode *inode) -{ - xfs_inode_t *ip = XFS_I(inode); - - trace_xfs_evict_inode(ip); - - truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); - XFS_STATS_INC(vn_rele); - XFS_STATS_INC(vn_remove); - XFS_STATS_DEC(vn_active); - - /* - * The iolock is used by the file system to coordinate reads, - * writes, and block truncates. Up to this point the lock - * protected concurrent accesses by users of the inode. But - * from here forward we're doing some final processing of the - * inode because we're done with it, and although we reuse the - * iolock for protection it is really a distinct lock class - * (in the lockdep sense) from before. To keep lockdep happy - * (and basically indicate what we are doing), we explicitly - * re-init the iolock here. - */ - ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); - mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); - lockdep_set_class_and_name(&ip->i_iolock.mr_lock, - &xfs_iolock_reclaimable, "xfs_iolock_reclaimable"); - - xfs_inactive(ip); -} - -STATIC void -xfs_free_fsname( - struct xfs_mount *mp) -{ - kfree(mp->m_fsname); - kfree(mp->m_rtname); - kfree(mp->m_logname); -} - -STATIC void -xfs_fs_put_super( - struct super_block *sb) -{ - struct xfs_mount *mp = XFS_M(sb); - - xfs_syncd_stop(mp); - - /* - * Blow away any referenced inode in the filestreams cache. - * This can and will cause log traffic as inodes go inactive - * here. - */ - xfs_filestream_unmount(mp); - - XFS_bflush(mp->m_ddev_targp); - - xfs_unmountfs(mp); - xfs_freesb(mp); - xfs_icsb_destroy_counters(mp); - xfs_close_devices(mp); - xfs_free_fsname(mp); - kfree(mp); -} - -STATIC int -xfs_fs_sync_fs( - struct super_block *sb, - int wait) -{ - struct xfs_mount *mp = XFS_M(sb); - int error; - - /* - * Not much we can do for the first async pass. Writing out the - * superblock would be counter-productive as we are going to redirty - * when writing out other data and metadata (and writing out a single - * block is quite fast anyway). - * - * Try to asynchronously kick off quota syncing at least. - */ - if (!wait) { - xfs_qm_sync(mp, SYNC_TRYLOCK); - return 0; - } - - error = xfs_quiesce_data(mp); - if (error) - return -error; - - if (laptop_mode) { - /* - * The disk must be active because we're syncing. - * We schedule xfssyncd now (now that the disk is - * active) instead of later (when it might not be). - */ - flush_delayed_work_sync(&mp->m_sync_work); - } - - return 0; -} - -STATIC int -xfs_fs_statfs( - struct dentry *dentry, - struct kstatfs *statp) -{ - struct xfs_mount *mp = XFS_M(dentry->d_sb); - xfs_sb_t *sbp = &mp->m_sb; - struct xfs_inode *ip = XFS_I(dentry->d_inode); - __uint64_t fakeinos, id; - xfs_extlen_t lsize; - __int64_t ffree; - - statp->f_type = XFS_SB_MAGIC; - statp->f_namelen = MAXNAMELEN - 1; - - id = huge_encode_dev(mp->m_ddev_targp->bt_dev); - statp->f_fsid.val[0] = (u32)id; - statp->f_fsid.val[1] = (u32)(id >> 32); - - xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); - - spin_lock(&mp->m_sb_lock); - statp->f_bsize = sbp->sb_blocksize; - lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; - statp->f_blocks = sbp->sb_dblocks - lsize; - statp->f_bfree = statp->f_bavail = - sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); - fakeinos = statp->f_bfree << sbp->sb_inopblog; - statp->f_files = - MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); - if (mp->m_maxicount) - statp->f_files = min_t(typeof(statp->f_files), - statp->f_files, - mp->m_maxicount); - - /* make sure statp->f_ffree does not underflow */ - ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); - statp->f_ffree = max_t(__int64_t, ffree, 0); - - spin_unlock(&mp->m_sb_lock); - - if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || - ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == - (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) - xfs_qm_statvfs(ip, statp); - return 0; -} - -STATIC void -xfs_save_resvblks(struct xfs_mount *mp) -{ - __uint64_t resblks = 0; - - mp->m_resblks_save = mp->m_resblks; - xfs_reserve_blocks(mp, &resblks, NULL); -} - -STATIC void -xfs_restore_resvblks(struct xfs_mount *mp) -{ - __uint64_t resblks; - - if (mp->m_resblks_save) { - resblks = mp->m_resblks_save; - mp->m_resblks_save = 0; - } else - resblks = xfs_default_resblks(mp); - - xfs_reserve_blocks(mp, &resblks, NULL); -} - -STATIC int -xfs_fs_remount( - struct super_block *sb, - int *flags, - char *options) -{ - struct xfs_mount *mp = XFS_M(sb); - substring_t args[MAX_OPT_ARGS]; - char *p; - int error; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_barrier: - mp->m_flags |= XFS_MOUNT_BARRIER; - break; - case Opt_nobarrier: - mp->m_flags &= ~XFS_MOUNT_BARRIER; - break; - default: - /* - * Logically we would return an error here to prevent - * users from believing they might have changed - * mount options using remount which can't be changed. - * - * But unfortunately mount(8) adds all options from - * mtab and fstab to the mount arguments in some cases - * so we can't blindly reject options, but have to - * check for each specified option if it actually - * differs from the currently set option and only - * reject it if that's the case. - * - * Until that is implemented we return success for - * every remount request, and silently ignore all - * options that we can't actually change. - */ -#if 0 - xfs_info(mp, - "mount option \"%s\" not supported for remount\n", p); - return -EINVAL; -#else - break; -#endif - } - } - - /* ro -> rw */ - if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { - mp->m_flags &= ~XFS_MOUNT_RDONLY; - - /* - * If this is the first remount to writeable state we - * might have some superblock changes to update. - */ - if (mp->m_update_flags) { - error = xfs_mount_log_sb(mp, mp->m_update_flags); - if (error) { - xfs_warn(mp, "failed to write sb changes"); - return error; - } - mp->m_update_flags = 0; - } - - /* - * Fill out the reserve pool if it is empty. Use the stashed - * value if it is non-zero, otherwise go with the default. - */ - xfs_restore_resvblks(mp); - } - - /* rw -> ro */ - if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { - /* - * After we have synced the data but before we sync the - * metadata, we need to free up the reserve block pool so that - * the used block count in the superblock on disk is correct at - * the end of the remount. Stash the current reserve pool size - * so that if we get remounted rw, we can return it to the same - * size. - */ - - xfs_quiesce_data(mp); - xfs_save_resvblks(mp); - xfs_quiesce_attr(mp); - mp->m_flags |= XFS_MOUNT_RDONLY; - } - - return 0; -} - -/* - * Second stage of a freeze. The data is already frozen so we only - * need to take care of the metadata. Once that's done write a dummy - * record to dirty the log in case of a crash while frozen. - */ -STATIC int -xfs_fs_freeze( - struct super_block *sb) -{ - struct xfs_mount *mp = XFS_M(sb); - - xfs_save_resvblks(mp); - xfs_quiesce_attr(mp); - return -xfs_fs_log_dummy(mp); -} - -STATIC int -xfs_fs_unfreeze( - struct super_block *sb) -{ - struct xfs_mount *mp = XFS_M(sb); - - xfs_restore_resvblks(mp); - return 0; -} - -STATIC int -xfs_fs_show_options( - struct seq_file *m, - struct vfsmount *mnt) -{ - return -xfs_showargs(XFS_M(mnt->mnt_sb), m); -} - -/* - * This function fills in xfs_mount_t fields based on mount args. - * Note: the superblock _has_ now been read in. - */ -STATIC int -xfs_finish_flags( - struct xfs_mount *mp) -{ - int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); - - /* Fail a mount where the logbuf is smaller than the log stripe */ - if (xfs_sb_version_haslogv2(&mp->m_sb)) { - if (mp->m_logbsize <= 0 && - mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { - mp->m_logbsize = mp->m_sb.sb_logsunit; - } else if (mp->m_logbsize > 0 && - mp->m_logbsize < mp->m_sb.sb_logsunit) { - xfs_warn(mp, - "logbuf size must be greater than or equal to log stripe size"); - return XFS_ERROR(EINVAL); - } - } else { - /* Fail a mount if the logbuf is larger than 32K */ - if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { - xfs_warn(mp, - "logbuf size for version 1 logs must be 16K or 32K"); - return XFS_ERROR(EINVAL); - } - } - - /* - * mkfs'ed attr2 will turn on attr2 mount unless explicitly - * told by noattr2 to turn it off - */ - if (xfs_sb_version_hasattr2(&mp->m_sb) && - !(mp->m_flags & XFS_MOUNT_NOATTR2)) - mp->m_flags |= XFS_MOUNT_ATTR2; - - /* - * prohibit r/w mounts of read-only filesystems - */ - if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { - xfs_warn(mp, - "cannot mount a read-only filesystem as read-write"); - return XFS_ERROR(EROFS); - } - - return 0; -} - -STATIC int -xfs_fs_fill_super( - struct super_block *sb, - void *data, - int silent) -{ - struct inode *root; - struct xfs_mount *mp = NULL; - int flags = 0, error = ENOMEM; - - mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); - if (!mp) - goto out; - - spin_lock_init(&mp->m_sb_lock); - mutex_init(&mp->m_growlock); - atomic_set(&mp->m_active_trans, 0); - - mp->m_super = sb; - sb->s_fs_info = mp; - - error = xfs_parseargs(mp, (char *)data); - if (error) - goto out_free_fsname; - - sb_min_blocksize(sb, BBSIZE); - sb->s_xattr = xfs_xattr_handlers; - sb->s_export_op = &xfs_export_operations; -#ifdef CONFIG_XFS_QUOTA - sb->s_qcop = &xfs_quotactl_operations; -#endif - sb->s_op = &xfs_super_operations; - - if (silent) - flags |= XFS_MFSI_QUIET; - - error = xfs_open_devices(mp); - if (error) - goto out_free_fsname; - - error = xfs_icsb_init_counters(mp); - if (error) - goto out_close_devices; - - error = xfs_readsb(mp, flags); - if (error) - goto out_destroy_counters; - - error = xfs_finish_flags(mp); - if (error) - goto out_free_sb; - - error = xfs_setup_devices(mp); - if (error) - goto out_free_sb; - - error = xfs_filestream_mount(mp); - if (error) - goto out_free_sb; - - /* - * we must configure the block size in the superblock before we run the - * full mount process as the mount process can lookup and cache inodes. - * For the same reason we must also initialise the syncd and register - * the inode cache shrinker so that inodes can be reclaimed during - * operations like a quotacheck that iterate all inodes in the - * filesystem. - */ - sb->s_magic = XFS_SB_MAGIC; - sb->s_blocksize = mp->m_sb.sb_blocksize; - sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; - sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); - sb->s_time_gran = 1; - set_posix_acl_flag(sb); - - error = xfs_mountfs(mp); - if (error) - goto out_filestream_unmount; - - error = xfs_syncd_init(mp); - if (error) - goto out_unmount; - - root = igrab(VFS_I(mp->m_rootip)); - if (!root) { - error = ENOENT; - goto out_syncd_stop; - } - if (is_bad_inode(root)) { - error = EINVAL; - goto out_syncd_stop; - } - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - error = ENOMEM; - goto out_iput; - } - - return 0; - - out_filestream_unmount: - xfs_filestream_unmount(mp); - out_free_sb: - xfs_freesb(mp); - out_destroy_counters: - xfs_icsb_destroy_counters(mp); - out_close_devices: - xfs_close_devices(mp); - out_free_fsname: - xfs_free_fsname(mp); - kfree(mp); - out: - return -error; - - out_iput: - iput(root); - out_syncd_stop: - xfs_syncd_stop(mp); - out_unmount: - /* - * Blow away any referenced inode in the filestreams cache. - * This can and will cause log traffic as inodes go inactive - * here. - */ - xfs_filestream_unmount(mp); - - XFS_bflush(mp->m_ddev_targp); - - xfs_unmountfs(mp); - goto out_free_sb; -} - -STATIC struct dentry * -xfs_fs_mount( - struct file_system_type *fs_type, - int flags, - const char *dev_name, - void *data) -{ - return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); -} - -static int -xfs_fs_nr_cached_objects( - struct super_block *sb) -{ - return xfs_reclaim_inodes_count(XFS_M(sb)); -} - -static void -xfs_fs_free_cached_objects( - struct super_block *sb, - int nr_to_scan) -{ - xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); -} - -static const struct super_operations xfs_super_operations = { - .alloc_inode = xfs_fs_alloc_inode, - .destroy_inode = xfs_fs_destroy_inode, - .dirty_inode = xfs_fs_dirty_inode, - .write_inode = xfs_fs_write_inode, - .evict_inode = xfs_fs_evict_inode, - .put_super = xfs_fs_put_super, - .sync_fs = xfs_fs_sync_fs, - .freeze_fs = xfs_fs_freeze, - .unfreeze_fs = xfs_fs_unfreeze, - .statfs = xfs_fs_statfs, - .remount_fs = xfs_fs_remount, - .show_options = xfs_fs_show_options, - .nr_cached_objects = xfs_fs_nr_cached_objects, - .free_cached_objects = xfs_fs_free_cached_objects, -}; - -static struct file_system_type xfs_fs_type = { - .owner = THIS_MODULE, - .name = "xfs", - .mount = xfs_fs_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - -STATIC int __init -xfs_init_zones(void) -{ - - xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); - if (!xfs_ioend_zone) - goto out; - - xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, - xfs_ioend_zone); - if (!xfs_ioend_pool) - goto out_destroy_ioend_zone; - - xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), - "xfs_log_ticket"); - if (!xfs_log_ticket_zone) - goto out_destroy_ioend_pool; - - xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), - "xfs_bmap_free_item"); - if (!xfs_bmap_free_item_zone) - goto out_destroy_log_ticket_zone; - - xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), - "xfs_btree_cur"); - if (!xfs_btree_cur_zone) - goto out_destroy_bmap_free_item_zone; - - xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), - "xfs_da_state"); - if (!xfs_da_state_zone) - goto out_destroy_btree_cur_zone; - - xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); - if (!xfs_dabuf_zone) - goto out_destroy_da_state_zone; - - xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); - if (!xfs_ifork_zone) - goto out_destroy_dabuf_zone; - - xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); - if (!xfs_trans_zone) - goto out_destroy_ifork_zone; - - xfs_log_item_desc_zone = - kmem_zone_init(sizeof(struct xfs_log_item_desc), - "xfs_log_item_desc"); - if (!xfs_log_item_desc_zone) - goto out_destroy_trans_zone; - - /* - * The size of the zone allocated buf log item is the maximum - * size possible under XFS. This wastes a little bit of memory, - * but it is much faster. - */ - xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + - (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / - NBWORD) * sizeof(int))), "xfs_buf_item"); - if (!xfs_buf_item_zone) - goto out_destroy_log_item_desc_zone; - - xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) + - ((XFS_EFD_MAX_FAST_EXTENTS - 1) * - sizeof(xfs_extent_t))), "xfs_efd_item"); - if (!xfs_efd_zone) - goto out_destroy_buf_item_zone; - - xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) + - ((XFS_EFI_MAX_FAST_EXTENTS - 1) * - sizeof(xfs_extent_t))), "xfs_efi_item"); - if (!xfs_efi_zone) - goto out_destroy_efd_zone; - - xfs_inode_zone = - kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD, - xfs_fs_inode_init_once); - if (!xfs_inode_zone) - goto out_destroy_efi_zone; - - xfs_ili_zone = - kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", - KM_ZONE_SPREAD, NULL); - if (!xfs_ili_zone) - goto out_destroy_inode_zone; - - return 0; - - out_destroy_inode_zone: - kmem_zone_destroy(xfs_inode_zone); - out_destroy_efi_zone: - kmem_zone_destroy(xfs_efi_zone); - out_destroy_efd_zone: - kmem_zone_destroy(xfs_efd_zone); - out_destroy_buf_item_zone: - kmem_zone_destroy(xfs_buf_item_zone); - out_destroy_log_item_desc_zone: - kmem_zone_destroy(xfs_log_item_desc_zone); - out_destroy_trans_zone: - kmem_zone_destroy(xfs_trans_zone); - out_destroy_ifork_zone: - kmem_zone_destroy(xfs_ifork_zone); - out_destroy_dabuf_zone: - kmem_zone_destroy(xfs_dabuf_zone); - out_destroy_da_state_zone: - kmem_zone_destroy(xfs_da_state_zone); - out_destroy_btree_cur_zone: - kmem_zone_destroy(xfs_btree_cur_zone); - out_destroy_bmap_free_item_zone: - kmem_zone_destroy(xfs_bmap_free_item_zone); - out_destroy_log_ticket_zone: - kmem_zone_destroy(xfs_log_ticket_zone); - out_destroy_ioend_pool: - mempool_destroy(xfs_ioend_pool); - out_destroy_ioend_zone: - kmem_zone_destroy(xfs_ioend_zone); - out: - return -ENOMEM; -} - -STATIC void -xfs_destroy_zones(void) -{ - kmem_zone_destroy(xfs_ili_zone); - kmem_zone_destroy(xfs_inode_zone); - kmem_zone_destroy(xfs_efi_zone); - kmem_zone_destroy(xfs_efd_zone); - kmem_zone_destroy(xfs_buf_item_zone); - kmem_zone_destroy(xfs_log_item_desc_zone); - kmem_zone_destroy(xfs_trans_zone); - kmem_zone_destroy(xfs_ifork_zone); - kmem_zone_destroy(xfs_dabuf_zone); - kmem_zone_destroy(xfs_da_state_zone); - kmem_zone_destroy(xfs_btree_cur_zone); - kmem_zone_destroy(xfs_bmap_free_item_zone); - kmem_zone_destroy(xfs_log_ticket_zone); - mempool_destroy(xfs_ioend_pool); - kmem_zone_destroy(xfs_ioend_zone); - -} - -STATIC int __init -xfs_init_workqueues(void) -{ - /* - * max_active is set to 8 to give enough concurency to allow - * multiple work operations on each CPU to run. This allows multiple - * filesystems to be running sync work concurrently, and scales with - * the number of CPUs in the system. - */ - xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); - if (!xfs_syncd_wq) - goto out; - - xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); - if (!xfs_ail_wq) - goto out_destroy_syncd; - - return 0; - -out_destroy_syncd: - destroy_workqueue(xfs_syncd_wq); -out: - return -ENOMEM; -} - -STATIC void -xfs_destroy_workqueues(void) -{ - destroy_workqueue(xfs_ail_wq); - destroy_workqueue(xfs_syncd_wq); -} - -STATIC int __init -init_xfs_fs(void) -{ - int error; - - printk(KERN_INFO XFS_VERSION_STRING " with " - XFS_BUILD_OPTIONS " enabled\n"); - - xfs_ioend_init(); - xfs_dir_startup(); - - error = xfs_init_zones(); - if (error) - goto out; - - error = xfs_init_workqueues(); - if (error) - goto out_destroy_zones; - - error = xfs_mru_cache_init(); - if (error) - goto out_destroy_wq; - - error = xfs_filestream_init(); - if (error) - goto out_mru_cache_uninit; - - error = xfs_buf_init(); - if (error) - goto out_filestream_uninit; - - error = xfs_init_procfs(); - if (error) - goto out_buf_terminate; - - error = xfs_sysctl_register(); - if (error) - goto out_cleanup_procfs; - - vfs_initquota(); - - error = register_filesystem(&xfs_fs_type); - if (error) - goto out_sysctl_unregister; - return 0; - - out_sysctl_unregister: - xfs_sysctl_unregister(); - out_cleanup_procfs: - xfs_cleanup_procfs(); - out_buf_terminate: - xfs_buf_terminate(); - out_filestream_uninit: - xfs_filestream_uninit(); - out_mru_cache_uninit: - xfs_mru_cache_uninit(); - out_destroy_wq: - xfs_destroy_workqueues(); - out_destroy_zones: - xfs_destroy_zones(); - out: - return error; -} - -STATIC void __exit -exit_xfs_fs(void) -{ - vfs_exitquota(); - unregister_filesystem(&xfs_fs_type); - xfs_sysctl_unregister(); - xfs_cleanup_procfs(); - xfs_buf_terminate(); - xfs_filestream_uninit(); - xfs_mru_cache_uninit(); - xfs_destroy_workqueues(); - xfs_destroy_zones(); -} - -module_init(init_xfs_fs); -module_exit(exit_xfs_fs); - -MODULE_AUTHOR("Silicon Graphics, Inc."); -MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); -MODULE_LICENSE("GPL"); diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h deleted file mode 100644 index 50a3266..0000000 --- a/fs/xfs/linux-2.6/xfs_super.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPER_H__ -#define __XFS_SUPER_H__ - -#include - -#ifdef CONFIG_XFS_QUOTA -extern void xfs_qm_init(void); -extern void xfs_qm_exit(void); -# define vfs_initquota() xfs_qm_init() -# define vfs_exitquota() xfs_qm_exit() -#else -# define vfs_initquota() do { } while (0) -# define vfs_exitquota() do { } while (0) -#endif - -#ifdef CONFIG_XFS_POSIX_ACL -# define XFS_ACL_STRING "ACLs, " -# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL) -#else -# define XFS_ACL_STRING -# define set_posix_acl_flag(sb) do { } while (0) -#endif - -#define XFS_SECURITY_STRING "security attributes, " - -#ifdef CONFIG_XFS_RT -# define XFS_REALTIME_STRING "realtime, " -#else -# define XFS_REALTIME_STRING -#endif - -#if XFS_BIG_BLKNOS -# if XFS_BIG_INUMS -# define XFS_BIGFS_STRING "large block/inode numbers, " -# else -# define XFS_BIGFS_STRING "large block numbers, " -# endif -#else -# define XFS_BIGFS_STRING -#endif - -#ifdef DEBUG -# define XFS_DBG_STRING "debug" -#else -# define XFS_DBG_STRING "no debug" -#endif - -#define XFS_VERSION_STRING "SGI XFS" -#define XFS_BUILD_OPTIONS XFS_ACL_STRING \ - XFS_SECURITY_STRING \ - XFS_REALTIME_STRING \ - XFS_BIGFS_STRING \ - XFS_DBG_STRING /* DBG must be last */ - -struct xfs_inode; -struct xfs_mount; -struct xfs_buftarg; -struct block_device; - -extern __uint64_t xfs_max_file_offset(unsigned int); - -extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); - -extern const struct export_operations xfs_export_operations; -extern const struct xattr_handler *xfs_xattr_handlers[]; -extern const struct quotactl_ops xfs_quotactl_operations; - -#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) - -#endif /* __XFS_SUPER_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c deleted file mode 100644 index 4604f90..0000000 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ /dev/null @@ -1,1065 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_dinode.h" -#include "xfs_error.h" -#include "xfs_filestream.h" -#include "xfs_vnodeops.h" -#include "xfs_inode_item.h" -#include "xfs_quota.h" -#include "xfs_trace.h" -#include "xfs_fsops.h" - -#include -#include - -struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ - -/* - * The inode lookup is done in batches to keep the amount of lock traffic and - * radix tree lookups to a minimum. The batch size is a trade off between - * lookup reduction and stack usage. This is in the reclaim path, so we can't - * be too greedy. - */ -#define XFS_LOOKUP_BATCH 32 - -STATIC int -xfs_inode_ag_walk_grab( - struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - - ASSERT(rcu_read_lock_held()); - - /* - * check for stale RCU freed inode - * - * If the inode has been reallocated, it doesn't matter if it's not in - * the AG we are walking - we are walking for writeback, so if it - * passes all the "valid inode" checks and is dirty, then we'll write - * it back anyway. If it has been reallocated and still being - * initialised, the XFS_INEW check below will catch it. - */ - spin_lock(&ip->i_flags_lock); - if (!ip->i_ino) - goto out_unlock_noent; - - /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ - if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) - goto out_unlock_noent; - spin_unlock(&ip->i_flags_lock); - - /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return EFSCORRUPTED; - - /* If we can't grab the inode, it must on it's way to reclaim. */ - if (!igrab(inode)) - return ENOENT; - - if (is_bad_inode(inode)) { - IRELE(ip); - return ENOENT; - } - - /* inode is valid */ - return 0; - -out_unlock_noent: - spin_unlock(&ip->i_flags_lock); - return ENOENT; -} - -STATIC int -xfs_inode_ag_walk( - struct xfs_mount *mp, - struct xfs_perag *pag, - int (*execute)(struct xfs_inode *ip, - struct xfs_perag *pag, int flags), - int flags) -{ - uint32_t first_index; - int last_error = 0; - int skipped; - int done; - int nr_found; - -restart: - done = 0; - skipped = 0; - first_index = 0; - nr_found = 0; - do { - struct xfs_inode *batch[XFS_LOOKUP_BATCH]; - int error = 0; - int i; - - rcu_read_lock(); - nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, - (void **)batch, first_index, - XFS_LOOKUP_BATCH); - if (!nr_found) { - rcu_read_unlock(); - break; - } - - /* - * Grab the inodes before we drop the lock. if we found - * nothing, nr == 0 and the loop will be skipped. - */ - for (i = 0; i < nr_found; i++) { - struct xfs_inode *ip = batch[i]; - - if (done || xfs_inode_ag_walk_grab(ip)) - batch[i] = NULL; - - /* - * Update the index for the next lookup. Catch - * overflows into the next AG range which can occur if - * we have inodes in the last block of the AG and we - * are currently pointing to the last inode. - * - * Because we may see inodes that are from the wrong AG - * due to RCU freeing and reallocation, only update the - * index if it lies in this AG. It was a race that lead - * us to see this inode, so another lookup from the - * same index will not find it again. - */ - if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) - continue; - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) - done = 1; - } - - /* unlock now we've grabbed the inodes. */ - rcu_read_unlock(); - - for (i = 0; i < nr_found; i++) { - if (!batch[i]) - continue; - error = execute(batch[i], pag, flags); - IRELE(batch[i]); - if (error == EAGAIN) { - skipped++; - continue; - } - if (error && last_error != EFSCORRUPTED) - last_error = error; - } - - /* bail out if the filesystem is corrupted. */ - if (error == EFSCORRUPTED) - break; - - cond_resched(); - - } while (nr_found && !done); - - if (skipped) { - delay(1); - goto restart; - } - return last_error; -} - -int -xfs_inode_ag_iterator( - struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, - struct xfs_perag *pag, int flags), - int flags) -{ - struct xfs_perag *pag; - int error = 0; - int last_error = 0; - xfs_agnumber_t ag; - - ag = 0; - while ((pag = xfs_perag_get(mp, ag))) { - ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags); - xfs_perag_put(pag); - if (error) { - last_error = error; - if (error == EFSCORRUPTED) - break; - } - } - return XFS_ERROR(last_error); -} - -STATIC int -xfs_sync_inode_data( - struct xfs_inode *ip, - struct xfs_perag *pag, - int flags) -{ - struct inode *inode = VFS_I(ip); - struct address_space *mapping = inode->i_mapping; - int error = 0; - - if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) - goto out_wait; - - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { - if (flags & SYNC_TRYLOCK) - goto out_wait; - xfs_ilock(ip, XFS_IOLOCK_SHARED); - } - - error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? - 0 : XBF_ASYNC, FI_NONE); - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - - out_wait: - if (flags & SYNC_WAIT) - xfs_ioend_wait(ip); - return error; -} - -STATIC int -xfs_sync_inode_attr( - struct xfs_inode *ip, - struct xfs_perag *pag, - int flags) -{ - int error = 0; - - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (xfs_inode_clean(ip)) - goto out_unlock; - if (!xfs_iflock_nowait(ip)) { - if (!(flags & SYNC_WAIT)) - goto out_unlock; - xfs_iflock(ip); - } - - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - goto out_unlock; - } - - error = xfs_iflush(ip, flags); - - /* - * We don't want to try again on non-blocking flushes that can't run - * again immediately. If an inode really must be written, then that's - * what the SYNC_WAIT flag is for. - */ - if (error == EAGAIN) { - ASSERT(!(flags & SYNC_WAIT)); - error = 0; - } - - out_unlock: - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return error; -} - -/* - * Write out pagecache data for the whole filesystem. - */ -STATIC int -xfs_sync_data( - struct xfs_mount *mp, - int flags) -{ - int error; - - ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); - - error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags); - if (error) - return XFS_ERROR(error); - - xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); - return 0; -} - -/* - * Write out inode metadata (attributes) for the whole filesystem. - */ -STATIC int -xfs_sync_attr( - struct xfs_mount *mp, - int flags) -{ - ASSERT((flags & ~SYNC_WAIT) == 0); - - return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); -} - -STATIC int -xfs_sync_fsdata( - struct xfs_mount *mp) -{ - struct xfs_buf *bp; - - /* - * If the buffer is pinned then push on the log so we won't get stuck - * waiting in the write for someone, maybe ourselves, to flush the log. - * - * Even though we just pushed the log above, we did not have the - * superblock buffer locked at that point so it can become pinned in - * between there and here. - */ - bp = xfs_getsb(mp, 0); - if (xfs_buf_ispinned(bp)) - xfs_log_force(mp, 0); - - return xfs_bwrite(mp, bp); -} - -/* - * When remounting a filesystem read-only or freezing the filesystem, we have - * two phases to execute. This first phase is syncing the data before we - * quiesce the filesystem, and the second is flushing all the inodes out after - * we've waited for all the transactions created by the first phase to - * complete. The second phase ensures that the inodes are written to their - * location on disk rather than just existing in transactions in the log. This - * means after a quiesce there is no log replay required to write the inodes to - * disk (this is the main difference between a sync and a quiesce). - */ -/* - * First stage of freeze - no writers will make progress now we are here, - * so we flush delwri and delalloc buffers here, then wait for all I/O to - * complete. Data is frozen at that point. Metadata is not frozen, - * transactions can still occur here so don't bother flushing the buftarg - * because it'll just get dirty again. - */ -int -xfs_quiesce_data( - struct xfs_mount *mp) -{ - int error, error2 = 0; - - xfs_qm_sync(mp, SYNC_TRYLOCK); - xfs_qm_sync(mp, SYNC_WAIT); - - /* force out the newly dirtied log buffers */ - xfs_log_force(mp, XFS_LOG_SYNC); - - /* write superblock and hoover up shutdown errors */ - error = xfs_sync_fsdata(mp); - - /* make sure all delwri buffers are written out */ - xfs_flush_buftarg(mp->m_ddev_targp, 1); - - /* mark the log as covered if needed */ - if (xfs_log_need_covered(mp)) - error2 = xfs_fs_log_dummy(mp); - - /* flush data-only devices */ - if (mp->m_rtdev_targp) - XFS_bflush(mp->m_rtdev_targp); - - return error ? error : error2; -} - -STATIC void -xfs_quiesce_fs( - struct xfs_mount *mp) -{ - int count = 0, pincount; - - xfs_reclaim_inodes(mp, 0); - xfs_flush_buftarg(mp->m_ddev_targp, 0); - - /* - * This loop must run at least twice. The first instance of the loop - * will flush most meta data but that will generate more meta data - * (typically directory updates). Which then must be flushed and - * logged before we can write the unmount record. We also so sync - * reclaim of inodes to catch any that the above delwri flush skipped. - */ - do { - xfs_reclaim_inodes(mp, SYNC_WAIT); - xfs_sync_attr(mp, SYNC_WAIT); - pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); - if (!pincount) { - delay(50); - count++; - } - } while (count < 2); -} - -/* - * Second stage of a quiesce. The data is already synced, now we have to take - * care of the metadata. New transactions are already blocked, so we need to - * wait for any remaining transactions to drain out before proceeding. - */ -void -xfs_quiesce_attr( - struct xfs_mount *mp) -{ - int error = 0; - - /* wait for all modifications to complete */ - while (atomic_read(&mp->m_active_trans) > 0) - delay(100); - - /* flush inodes and push all remaining buffers out to disk */ - xfs_quiesce_fs(mp); - - /* - * Just warn here till VFS can correctly support - * read-only remount without racing. - */ - WARN_ON(atomic_read(&mp->m_active_trans) != 0); - - /* Push the superblock and write an unmount record */ - error = xfs_log_sbcount(mp); - if (error) - xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " - "Frozen image may not be consistent."); - xfs_log_unmount_write(mp); - xfs_unmountfs_writesb(mp); -} - -static void -xfs_syncd_queue_sync( - struct xfs_mount *mp) -{ - queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, - msecs_to_jiffies(xfs_syncd_centisecs * 10)); -} - -/* - * Every sync period we need to unpin all items, reclaim inodes and sync - * disk quotas. We might need to cover the log to indicate that the - * filesystem is idle and not frozen. - */ -STATIC void -xfs_sync_worker( - struct work_struct *work) -{ - struct xfs_mount *mp = container_of(to_delayed_work(work), - struct xfs_mount, m_sync_work); - int error; - - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - /* dgc: errors ignored here */ - if (mp->m_super->s_frozen == SB_UNFROZEN && - xfs_log_need_covered(mp)) - error = xfs_fs_log_dummy(mp); - else - xfs_log_force(mp, 0); - error = xfs_qm_sync(mp, SYNC_TRYLOCK); - - /* start pushing all the metadata that is currently dirty */ - xfs_ail_push_all(mp->m_ail); - } - - /* queue us up again */ - xfs_syncd_queue_sync(mp); -} - -/* - * Queue a new inode reclaim pass if there are reclaimable inodes and there - * isn't a reclaim pass already in progress. By default it runs every 5s based - * on the xfs syncd work default of 30s. Perhaps this should have it's own - * tunable, but that can be done if this method proves to be ineffective or too - * aggressive. - */ -static void -xfs_syncd_queue_reclaim( - struct xfs_mount *mp) -{ - - /* - * We can have inodes enter reclaim after we've shut down the syncd - * workqueue during unmount, so don't allow reclaim work to be queued - * during unmount. - */ - if (!(mp->m_super->s_flags & MS_ACTIVE)) - return; - - rcu_read_lock(); - if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { - queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, - msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); - } - rcu_read_unlock(); -} - -/* - * This is a fast pass over the inode cache to try to get reclaim moving on as - * many inodes as possible in a short period of time. It kicks itself every few - * seconds, as well as being kicked by the inode cache shrinker when memory - * goes low. It scans as quickly as possible avoiding locked inodes or those - * already being flushed, and once done schedules a future pass. - */ -STATIC void -xfs_reclaim_worker( - struct work_struct *work) -{ - struct xfs_mount *mp = container_of(to_delayed_work(work), - struct xfs_mount, m_reclaim_work); - - xfs_reclaim_inodes(mp, SYNC_TRYLOCK); - xfs_syncd_queue_reclaim(mp); -} - -/* - * Flush delayed allocate data, attempting to free up reserved space - * from existing allocations. At this point a new allocation attempt - * has failed with ENOSPC and we are in the process of scratching our - * heads, looking about for more room. - * - * Queue a new data flush if there isn't one already in progress and - * wait for completion of the flush. This means that we only ever have one - * inode flush in progress no matter how many ENOSPC events are occurring and - * so will prevent the system from bogging down due to every concurrent - * ENOSPC event scanning all the active inodes in the system for writeback. - */ -void -xfs_flush_inodes( - struct xfs_inode *ip) -{ - struct xfs_mount *mp = ip->i_mount; - - queue_work(xfs_syncd_wq, &mp->m_flush_work); - flush_work_sync(&mp->m_flush_work); -} - -STATIC void -xfs_flush_worker( - struct work_struct *work) -{ - struct xfs_mount *mp = container_of(work, - struct xfs_mount, m_flush_work); - - xfs_sync_data(mp, SYNC_TRYLOCK); - xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); -} - -int -xfs_syncd_init( - struct xfs_mount *mp) -{ - INIT_WORK(&mp->m_flush_work, xfs_flush_worker); - INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); - INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); - - xfs_syncd_queue_sync(mp); - xfs_syncd_queue_reclaim(mp); - - return 0; -} - -void -xfs_syncd_stop( - struct xfs_mount *mp) -{ - cancel_delayed_work_sync(&mp->m_sync_work); - cancel_delayed_work_sync(&mp->m_reclaim_work); - cancel_work_sync(&mp->m_flush_work); -} - -void -__xfs_inode_set_reclaim_tag( - struct xfs_perag *pag, - struct xfs_inode *ip) -{ - radix_tree_tag_set(&pag->pag_ici_root, - XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), - XFS_ICI_RECLAIM_TAG); - - if (!pag->pag_ici_reclaimable) { - /* propagate the reclaim tag up into the perag radix tree */ - spin_lock(&ip->i_mount->m_perag_lock); - radix_tree_tag_set(&ip->i_mount->m_perag_tree, - XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), - XFS_ICI_RECLAIM_TAG); - spin_unlock(&ip->i_mount->m_perag_lock); - - /* schedule periodic background inode reclaim */ - xfs_syncd_queue_reclaim(ip->i_mount); - - trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, - -1, _RET_IP_); - } - pag->pag_ici_reclaimable++; -} - -/* - * We set the inode flag atomically with the radix tree tag. - * Once we get tag lookups on the radix tree, this inode flag - * can go away. - */ -void -xfs_inode_set_reclaim_tag( - xfs_inode_t *ip) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_perag *pag; - - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - spin_lock(&pag->pag_ici_lock); - spin_lock(&ip->i_flags_lock); - __xfs_inode_set_reclaim_tag(pag, ip); - __xfs_iflags_set(ip, XFS_IRECLAIMABLE); - spin_unlock(&ip->i_flags_lock); - spin_unlock(&pag->pag_ici_lock); - xfs_perag_put(pag); -} - -STATIC void -__xfs_inode_clear_reclaim( - xfs_perag_t *pag, - xfs_inode_t *ip) -{ - pag->pag_ici_reclaimable--; - if (!pag->pag_ici_reclaimable) { - /* clear the reclaim tag from the perag radix tree */ - spin_lock(&ip->i_mount->m_perag_lock); - radix_tree_tag_clear(&ip->i_mount->m_perag_tree, - XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), - XFS_ICI_RECLAIM_TAG); - spin_unlock(&ip->i_mount->m_perag_lock); - trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, - -1, _RET_IP_); - } -} - -void -__xfs_inode_clear_reclaim_tag( - xfs_mount_t *mp, - xfs_perag_t *pag, - xfs_inode_t *ip) -{ - radix_tree_tag_clear(&pag->pag_ici_root, - XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); - __xfs_inode_clear_reclaim(pag, ip); -} - -/* - * Grab the inode for reclaim exclusively. - * Return 0 if we grabbed it, non-zero otherwise. - */ -STATIC int -xfs_reclaim_inode_grab( - struct xfs_inode *ip, - int flags) -{ - ASSERT(rcu_read_lock_held()); - - /* quick check for stale RCU freed inode */ - if (!ip->i_ino) - return 1; - - /* - * do some unlocked checks first to avoid unnecessary lock traffic. - * The first is a flush lock check, the second is a already in reclaim - * check. Only do these checks if we are not going to block on locks. - */ - if ((flags & SYNC_TRYLOCK) && - (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { - return 1; - } - - /* - * The radix tree lock here protects a thread in xfs_iget from racing - * with us starting reclaim on the inode. Once we have the - * XFS_IRECLAIM flag set it will not touch us. - * - * Due to RCU lookup, we may find inodes that have been freed and only - * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that - * aren't candidates for reclaim at all, so we must check the - * XFS_IRECLAIMABLE is set first before proceeding to reclaim. - */ - spin_lock(&ip->i_flags_lock); - if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || - __xfs_iflags_test(ip, XFS_IRECLAIM)) { - /* not a reclaim candidate. */ - spin_unlock(&ip->i_flags_lock); - return 1; - } - __xfs_iflags_set(ip, XFS_IRECLAIM); - spin_unlock(&ip->i_flags_lock); - return 0; -} - -/* - * Inodes in different states need to be treated differently, and the return - * value of xfs_iflush is not sufficient to get this right. The following table - * lists the inode states and the reclaim actions necessary for non-blocking - * reclaim: - * - * - * inode state iflush ret required action - * --------------- ---------- --------------- - * bad - reclaim - * shutdown EIO unpin and reclaim - * clean, unpinned 0 reclaim - * stale, unpinned 0 reclaim - * clean, pinned(*) 0 requeue - * stale, pinned EAGAIN requeue - * dirty, delwri ok 0 requeue - * dirty, delwri blocked EAGAIN requeue - * dirty, sync flush 0 reclaim - * - * (*) dgc: I don't think the clean, pinned state is possible but it gets - * handled anyway given the order of checks implemented. - * - * As can be seen from the table, the return value of xfs_iflush() is not - * sufficient to correctly decide the reclaim action here. The checks in - * xfs_iflush() might look like duplicates, but they are not. - * - * Also, because we get the flush lock first, we know that any inode that has - * been flushed delwri has had the flush completed by the time we check that - * the inode is clean. The clean inode check needs to be done before flushing - * the inode delwri otherwise we would loop forever requeuing clean inodes as - * we cannot tell apart a successful delwri flush and a clean inode from the - * return value of xfs_iflush(). - * - * Note that because the inode is flushed delayed write by background - * writeback, the flush lock may already be held here and waiting on it can - * result in very long latencies. Hence for sync reclaims, where we wait on the - * flush lock, the caller should push out delayed write inodes first before - * trying to reclaim them to minimise the amount of time spent waiting. For - * background relaim, we just requeue the inode for the next pass. - * - * Hence the order of actions after gaining the locks should be: - * bad => reclaim - * shutdown => unpin and reclaim - * pinned, delwri => requeue - * pinned, sync => unpin - * stale => reclaim - * clean => reclaim - * dirty, delwri => flush and requeue - * dirty, sync => flush, wait and reclaim - */ -STATIC int -xfs_reclaim_inode( - struct xfs_inode *ip, - struct xfs_perag *pag, - int sync_mode) -{ - int error; - -restart: - error = 0; - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (!xfs_iflock_nowait(ip)) { - if (!(sync_mode & SYNC_WAIT)) - goto out; - xfs_iflock(ip); - } - - if (is_bad_inode(VFS_I(ip))) - goto reclaim; - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_iunpin_wait(ip); - goto reclaim; - } - if (xfs_ipincount(ip)) { - if (!(sync_mode & SYNC_WAIT)) { - xfs_ifunlock(ip); - goto out; - } - xfs_iunpin_wait(ip); - } - if (xfs_iflags_test(ip, XFS_ISTALE)) - goto reclaim; - if (xfs_inode_clean(ip)) - goto reclaim; - - /* - * Now we have an inode that needs flushing. - * - * We do a nonblocking flush here even if we are doing a SYNC_WAIT - * reclaim as we can deadlock with inode cluster removal. - * xfs_ifree_cluster() can lock the inode buffer before it locks the - * ip->i_lock, and we are doing the exact opposite here. As a result, - * doing a blocking xfs_itobp() to get the cluster buffer will result - * in an ABBA deadlock with xfs_ifree_cluster(). - * - * As xfs_ifree_cluser() must gather all inodes that are active in the - * cache to mark them stale, if we hit this case we don't actually want - * to do IO here - we want the inode marked stale so we can simply - * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, - * just unlock the inode, back off and try again. Hopefully the next - * pass through will see the stale flag set on the inode. - */ - error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); - if (sync_mode & SYNC_WAIT) { - if (error == EAGAIN) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - /* backoff longer than in xfs_ifree_cluster */ - delay(2); - goto restart; - } - xfs_iflock(ip); - goto reclaim; - } - - /* - * When we have to flush an inode but don't have SYNC_WAIT set, we - * flush the inode out using a delwri buffer and wait for the next - * call into reclaim to find it in a clean state instead of waiting for - * it now. We also don't return errors here - if the error is transient - * then the next reclaim pass will flush the inode, and if the error - * is permanent then the next sync reclaim will reclaim the inode and - * pass on the error. - */ - if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_warn(ip->i_mount, - "inode 0x%llx background reclaim flush failed with %d", - (long long)ip->i_ino, error); - } -out: - xfs_iflags_clear(ip, XFS_IRECLAIM); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - /* - * We could return EAGAIN here to make reclaim rescan the inode tree in - * a short while. However, this just burns CPU time scanning the tree - * waiting for IO to complete and xfssyncd never goes back to the idle - * state. Instead, return 0 to let the next scheduled background reclaim - * attempt to reclaim the inode again. - */ - return 0; - -reclaim: - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - XFS_STATS_INC(xs_ig_reclaims); - /* - * Remove the inode from the per-AG radix tree. - * - * Because radix_tree_delete won't complain even if the item was never - * added to the tree assert that it's been there before to catch - * problems with the inode life time early on. - */ - spin_lock(&pag->pag_ici_lock); - if (!radix_tree_delete(&pag->pag_ici_root, - XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) - ASSERT(0); - __xfs_inode_clear_reclaim(pag, ip); - spin_unlock(&pag->pag_ici_lock); - - /* - * Here we do an (almost) spurious inode lock in order to coordinate - * with inode cache radix tree lookups. This is because the lookup - * can reference the inodes in the cache without taking references. - * - * We make that OK here by ensuring that we wait until the inode is - * unlocked after the lookup before we go ahead and free it. We get - * both the ilock and the iolock because the code may need to drop the - * ilock one but will still hold the iolock. - */ - xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_qm_dqdetach(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - - xfs_inode_free(ip); - return error; - -} - -/* - * Walk the AGs and reclaim the inodes in them. Even if the filesystem is - * corrupted, we still want to try to reclaim all the inodes. If we don't, - * then a shut down during filesystem unmount reclaim walk leak all the - * unreclaimed inodes. - */ -int -xfs_reclaim_inodes_ag( - struct xfs_mount *mp, - int flags, - int *nr_to_scan) -{ - struct xfs_perag *pag; - int error = 0; - int last_error = 0; - xfs_agnumber_t ag; - int trylock = flags & SYNC_TRYLOCK; - int skipped; - -restart: - ag = 0; - skipped = 0; - while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { - unsigned long first_index = 0; - int done = 0; - int nr_found = 0; - - ag = pag->pag_agno + 1; - - if (trylock) { - if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { - skipped++; - xfs_perag_put(pag); - continue; - } - first_index = pag->pag_ici_reclaim_cursor; - } else - mutex_lock(&pag->pag_ici_reclaim_lock); - - do { - struct xfs_inode *batch[XFS_LOOKUP_BATCH]; - int i; - - rcu_read_lock(); - nr_found = radix_tree_gang_lookup_tag( - &pag->pag_ici_root, - (void **)batch, first_index, - XFS_LOOKUP_BATCH, - XFS_ICI_RECLAIM_TAG); - if (!nr_found) { - done = 1; - rcu_read_unlock(); - break; - } - - /* - * Grab the inodes before we drop the lock. if we found - * nothing, nr == 0 and the loop will be skipped. - */ - for (i = 0; i < nr_found; i++) { - struct xfs_inode *ip = batch[i]; - - if (done || xfs_reclaim_inode_grab(ip, flags)) - batch[i] = NULL; - - /* - * Update the index for the next lookup. Catch - * overflows into the next AG range which can - * occur if we have inodes in the last block of - * the AG and we are currently pointing to the - * last inode. - * - * Because we may see inodes that are from the - * wrong AG due to RCU freeing and - * reallocation, only update the index if it - * lies in this AG. It was a race that lead us - * to see this inode, so another lookup from - * the same index will not find it again. - */ - if (XFS_INO_TO_AGNO(mp, ip->i_ino) != - pag->pag_agno) - continue; - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) - done = 1; - } - - /* unlock now we've grabbed the inodes. */ - rcu_read_unlock(); - - for (i = 0; i < nr_found; i++) { - if (!batch[i]) - continue; - error = xfs_reclaim_inode(batch[i], pag, flags); - if (error && last_error != EFSCORRUPTED) - last_error = error; - } - - *nr_to_scan -= XFS_LOOKUP_BATCH; - - cond_resched(); - - } while (nr_found && !done && *nr_to_scan > 0); - - if (trylock && !done) - pag->pag_ici_reclaim_cursor = first_index; - else - pag->pag_ici_reclaim_cursor = 0; - mutex_unlock(&pag->pag_ici_reclaim_lock); - xfs_perag_put(pag); - } - - /* - * if we skipped any AG, and we still have scan count remaining, do - * another pass this time using blocking reclaim semantics (i.e - * waiting on the reclaim locks and ignoring the reclaim cursors). This - * ensure that when we get more reclaimers than AGs we block rather - * than spin trying to execute reclaim. - */ - if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) { - trylock = 0; - goto restart; - } - return XFS_ERROR(last_error); -} - -int -xfs_reclaim_inodes( - xfs_mount_t *mp, - int mode) -{ - int nr_to_scan = INT_MAX; - - return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); -} - -/* - * Scan a certain number of inodes for reclaim. - * - * When called we make sure that there is a background (fast) inode reclaim in - * progress, while we will throttle the speed of reclaim via doing synchronous - * reclaim of inodes. That means if we come across dirty inodes, we wait for - * them to be cleaned, which we hope will not be very long due to the - * background walker having already kicked the IO off on those dirty inodes. - */ -void -xfs_reclaim_inodes_nr( - struct xfs_mount *mp, - int nr_to_scan) -{ - /* kick background reclaimer and push the AIL */ - xfs_syncd_queue_reclaim(mp); - xfs_ail_push_all(mp->m_ail); - - xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); -} - -/* - * Return the number of reclaimable inodes in the filesystem for - * the shrinker to determine how much to reclaim. - */ -int -xfs_reclaim_inodes_count( - struct xfs_mount *mp) -{ - struct xfs_perag *pag; - xfs_agnumber_t ag = 0; - int reclaimable = 0; - - while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { - ag = pag->pag_agno + 1; - reclaimable += pag->pag_ici_reclaimable; - xfs_perag_put(pag); - } - return reclaimable; -} - diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h deleted file mode 100644 index 941202e..0000000 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef XFS_SYNC_H -#define XFS_SYNC_H 1 - -struct xfs_mount; -struct xfs_perag; - -#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ -#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ - -extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ - -int xfs_syncd_init(struct xfs_mount *mp); -void xfs_syncd_stop(struct xfs_mount *mp); - -int xfs_quiesce_data(struct xfs_mount *mp); -void xfs_quiesce_attr(struct xfs_mount *mp); - -void xfs_flush_inodes(struct xfs_inode *ip); - -int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); -int xfs_reclaim_inodes_count(struct xfs_mount *mp); -void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); - -void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); -void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); -void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, - struct xfs_inode *ip); - -int xfs_sync_inode_grab(struct xfs_inode *ip); -int xfs_inode_ag_iterator(struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags); - -#endif diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c deleted file mode 100644 index ee2d2ad..0000000 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright (c) 2001-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include -#include -#include "xfs_error.h" - -static struct ctl_table_header *xfs_table_header; - -#ifdef CONFIG_PROC_FS -STATIC int -xfs_stats_clear_proc_handler( - ctl_table *ctl, - int write, - void __user *buffer, - size_t *lenp, - loff_t *ppos) -{ - int c, ret, *valp = ctl->data; - __uint32_t vn_active; - - ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); - - if (!ret && write && *valp) { - xfs_notice(NULL, "Clearing xfsstats"); - for_each_possible_cpu(c) { - preempt_disable(); - /* save vn_active, it's a universal truth! */ - vn_active = per_cpu(xfsstats, c).vn_active; - memset(&per_cpu(xfsstats, c), 0, - sizeof(struct xfsstats)); - per_cpu(xfsstats, c).vn_active = vn_active; - preempt_enable(); - } - xfs_stats_clear = 0; - } - - return ret; -} - -STATIC int -xfs_panic_mask_proc_handler( - ctl_table *ctl, - int write, - void __user *buffer, - size_t *lenp, - loff_t *ppos) -{ - int ret, *valp = ctl->data; - - ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); - if (!ret && write) { - xfs_panic_mask = *valp; -#ifdef DEBUG - xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); -#endif - } - return ret; -} -#endif /* CONFIG_PROC_FS */ - -static ctl_table xfs_table[] = { - { - .procname = "irix_sgid_inherit", - .data = &xfs_params.sgid_inherit.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.sgid_inherit.min, - .extra2 = &xfs_params.sgid_inherit.max - }, - { - .procname = "irix_symlink_mode", - .data = &xfs_params.symlink_mode.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.symlink_mode.min, - .extra2 = &xfs_params.symlink_mode.max - }, - { - .procname = "panic_mask", - .data = &xfs_params.panic_mask.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = xfs_panic_mask_proc_handler, - .extra1 = &xfs_params.panic_mask.min, - .extra2 = &xfs_params.panic_mask.max - }, - - { - .procname = "error_level", - .data = &xfs_params.error_level.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.error_level.min, - .extra2 = &xfs_params.error_level.max - }, - { - .procname = "xfssyncd_centisecs", - .data = &xfs_params.syncd_timer.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.syncd_timer.min, - .extra2 = &xfs_params.syncd_timer.max - }, - { - .procname = "inherit_sync", - .data = &xfs_params.inherit_sync.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.inherit_sync.min, - .extra2 = &xfs_params.inherit_sync.max - }, - { - .procname = "inherit_nodump", - .data = &xfs_params.inherit_nodump.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.inherit_nodump.min, - .extra2 = &xfs_params.inherit_nodump.max - }, - { - .procname = "inherit_noatime", - .data = &xfs_params.inherit_noatim.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.inherit_noatim.min, - .extra2 = &xfs_params.inherit_noatim.max - }, - { - .procname = "xfsbufd_centisecs", - .data = &xfs_params.xfs_buf_timer.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.xfs_buf_timer.min, - .extra2 = &xfs_params.xfs_buf_timer.max - }, - { - .procname = "age_buffer_centisecs", - .data = &xfs_params.xfs_buf_age.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.xfs_buf_age.min, - .extra2 = &xfs_params.xfs_buf_age.max - }, - { - .procname = "inherit_nosymlinks", - .data = &xfs_params.inherit_nosym.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.inherit_nosym.min, - .extra2 = &xfs_params.inherit_nosym.max - }, - { - .procname = "rotorstep", - .data = &xfs_params.rotorstep.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.rotorstep.min, - .extra2 = &xfs_params.rotorstep.max - }, - { - .procname = "inherit_nodefrag", - .data = &xfs_params.inherit_nodfrg.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.inherit_nodfrg.min, - .extra2 = &xfs_params.inherit_nodfrg.max - }, - { - .procname = "filestream_centisecs", - .data = &xfs_params.fstrm_timer.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.fstrm_timer.min, - .extra2 = &xfs_params.fstrm_timer.max, - }, - /* please keep this the last entry */ -#ifdef CONFIG_PROC_FS - { - .procname = "stats_clear", - .data = &xfs_params.stats_clear.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = xfs_stats_clear_proc_handler, - .extra1 = &xfs_params.stats_clear.min, - .extra2 = &xfs_params.stats_clear.max - }, -#endif /* CONFIG_PROC_FS */ - - {} -}; - -static ctl_table xfs_dir_table[] = { - { - .procname = "xfs", - .mode = 0555, - .child = xfs_table - }, - {} -}; - -static ctl_table xfs_root_table[] = { - { - .procname = "fs", - .mode = 0555, - .child = xfs_dir_table - }, - {} -}; - -int -xfs_sysctl_register(void) -{ - xfs_table_header = register_sysctl_table(xfs_root_table); - if (!xfs_table_header) - return -ENOMEM; - return 0; -} - -void -xfs_sysctl_unregister(void) -{ - unregister_sysctl_table(xfs_table_header); -} diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h deleted file mode 100644 index b9937d4..0000000 --- a/fs/xfs/linux-2.6/xfs_sysctl.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2001-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SYSCTL_H__ -#define __XFS_SYSCTL_H__ - -#include - -/* - * Tunable xfs parameters - */ - -typedef struct xfs_sysctl_val { - int min; - int val; - int max; -} xfs_sysctl_val_t; - -typedef struct xfs_param { - xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is - * not a member of parent dir GID. */ - xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ - xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */ - xfs_sysctl_val_t error_level; /* Degree of reporting for problems */ - xfs_sysctl_val_t syncd_timer; /* Interval between xfssyncd wakeups */ - xfs_sysctl_val_t stats_clear; /* Reset all XFS statistics to zero. */ - xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */ - xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */ - xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */ - xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */ - xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */ - xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */ - xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ - xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ - xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */ -} xfs_param_t; - -/* - * xfs_error_level: - * - * How much error reporting will be done when internal problems are - * encountered. These problems normally return an EFSCORRUPTED to their - * caller, with no other information reported. - * - * 0 No error reports - * 1 Report EFSCORRUPTED errors that will cause a filesystem shutdown - * 5 Report all EFSCORRUPTED errors (all of the above errors, plus any - * additional errors that are known to not cause shutdowns) - * - * xfs_panic_mask bit 0x8 turns the error reports into panics - */ - -enum { - /* XFS_REFCACHE_SIZE = 1 */ - /* XFS_REFCACHE_PURGE = 2 */ - /* XFS_RESTRICT_CHOWN = 3 */ - XFS_SGID_INHERIT = 4, - XFS_SYMLINK_MODE = 5, - XFS_PANIC_MASK = 6, - XFS_ERRLEVEL = 7, - XFS_SYNCD_TIMER = 8, - /* XFS_PROBE_DMAPI = 9 */ - /* XFS_PROBE_IOOPS = 10 */ - /* XFS_PROBE_QUOTA = 11 */ - XFS_STATS_CLEAR = 12, - XFS_INHERIT_SYNC = 13, - XFS_INHERIT_NODUMP = 14, - XFS_INHERIT_NOATIME = 15, - XFS_BUF_TIMER = 16, - XFS_BUF_AGE = 17, - /* XFS_IO_BYPASS = 18 */ - XFS_INHERIT_NOSYM = 19, - XFS_ROTORSTEP = 20, - XFS_INHERIT_NODFRG = 21, - XFS_FILESTREAM_TIMER = 22, -}; - -extern xfs_param_t xfs_params; - -#ifdef CONFIG_SYSCTL -extern int xfs_sysctl_register(void); -extern void xfs_sysctl_unregister(void); -#else -# define xfs_sysctl_register() (0) -# define xfs_sysctl_unregister() do { } while (0) -#endif /* CONFIG_SYSCTL */ - -#endif /* __XFS_SYSCTL_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c deleted file mode 100644 index 9010ce8..0000000 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2009, Christoph Hellwig - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_mount.h" -#include "xfs_ialloc.h" -#include "xfs_itable.h" -#include "xfs_alloc.h" -#include "xfs_bmap.h" -#include "xfs_attr.h" -#include "xfs_attr_leaf.h" -#include "xfs_log_priv.h" -#include "xfs_buf_item.h" -#include "xfs_quota.h" -#include "xfs_iomap.h" -#include "xfs_aops.h" -#include "xfs_dquot_item.h" -#include "xfs_dquot.h" -#include "xfs_log_recover.h" -#include "xfs_inode_item.h" - -/* - * We include this last to have the helpers above available for the trace - * event implementations. - */ -#define CREATE_TRACE_POINTS -#include "xfs_trace.h" diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h deleted file mode 100644 index 690fc7a..0000000 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ /dev/null @@ -1,1746 +0,0 @@ -/* - * Copyright (c) 2009, Christoph Hellwig - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM xfs - -#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_XFS_H - -#include - -struct xfs_agf; -struct xfs_alloc_arg; -struct xfs_attr_list_context; -struct xfs_buf_log_item; -struct xfs_da_args; -struct xfs_da_node_entry; -struct xfs_dquot; -struct xlog_ticket; -struct log; -struct xlog_recover; -struct xlog_recover_item; -struct xfs_buf_log_format; -struct xfs_inode_log_format; - -DECLARE_EVENT_CLASS(xfs_attr_list_class, - TP_PROTO(struct xfs_attr_list_context *ctx), - TP_ARGS(ctx), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(u32, hashval) - __field(u32, blkno) - __field(u32, offset) - __field(void *, alist) - __field(int, bufsize) - __field(int, count) - __field(int, firstu) - __field(int, dupcnt) - __field(int, flags) - ), - TP_fast_assign( - __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; - __entry->ino = ctx->dp->i_ino; - __entry->hashval = ctx->cursor->hashval; - __entry->blkno = ctx->cursor->blkno; - __entry->offset = ctx->cursor->offset; - __entry->alist = ctx->alist; - __entry->bufsize = ctx->bufsize; - __entry->count = ctx->count; - __entry->firstu = ctx->firstu; - __entry->flags = ctx->flags; - ), - TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " - "alist 0x%p size %u count %u firstu %u flags %d %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->hashval, - __entry->blkno, - __entry->offset, - __entry->dupcnt, - __entry->alist, - __entry->bufsize, - __entry->count, - __entry->firstu, - __entry->flags, - __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) - ) -) - -#define DEFINE_ATTR_LIST_EVENT(name) \ -DEFINE_EVENT(xfs_attr_list_class, name, \ - TP_PROTO(struct xfs_attr_list_context *ctx), \ - TP_ARGS(ctx)) -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); - -DECLARE_EVENT_CLASS(xfs_perag_class, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, - unsigned long caller_ip), - TP_ARGS(mp, agno, refcount, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(int, refcount) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->refcount = refcount; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d agno %u refcount %d caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->refcount, - (char *)__entry->caller_ip) -); - -#define DEFINE_PERAG_REF_EVENT(name) \ -DEFINE_EVENT(xfs_perag_class, name, \ - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ - unsigned long caller_ip), \ - TP_ARGS(mp, agno, refcount, caller_ip)) -DEFINE_PERAG_REF_EVENT(xfs_perag_get); -DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); -DEFINE_PERAG_REF_EVENT(xfs_perag_put); -DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); -DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); - -TRACE_EVENT(xfs_attr_list_node_descend, - TP_PROTO(struct xfs_attr_list_context *ctx, - struct xfs_da_node_entry *btree), - TP_ARGS(ctx, btree), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(u32, hashval) - __field(u32, blkno) - __field(u32, offset) - __field(void *, alist) - __field(int, bufsize) - __field(int, count) - __field(int, firstu) - __field(int, dupcnt) - __field(int, flags) - __field(u32, bt_hashval) - __field(u32, bt_before) - ), - TP_fast_assign( - __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; - __entry->ino = ctx->dp->i_ino; - __entry->hashval = ctx->cursor->hashval; - __entry->blkno = ctx->cursor->blkno; - __entry->offset = ctx->cursor->offset; - __entry->alist = ctx->alist; - __entry->bufsize = ctx->bufsize; - __entry->count = ctx->count; - __entry->firstu = ctx->firstu; - __entry->flags = ctx->flags; - __entry->bt_hashval = be32_to_cpu(btree->hashval); - __entry->bt_before = be32_to_cpu(btree->before); - ), - TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " - "alist 0x%p size %u count %u firstu %u flags %d %s " - "node hashval %u, node before %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->hashval, - __entry->blkno, - __entry->offset, - __entry->dupcnt, - __entry->alist, - __entry->bufsize, - __entry->count, - __entry->firstu, - __entry->flags, - __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS), - __entry->bt_hashval, - __entry->bt_before) -); - -TRACE_EVENT(xfs_iext_insert, - TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, - struct xfs_bmbt_irec *r, int state, unsigned long caller_ip), - TP_ARGS(ip, idx, r, state, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_extnum_t, idx) - __field(xfs_fileoff_t, startoff) - __field(xfs_fsblock_t, startblock) - __field(xfs_filblks_t, blockcount) - __field(xfs_exntst_t, state) - __field(int, bmap_state) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->idx = idx; - __entry->startoff = r->br_startoff; - __entry->startblock = r->br_startblock; - __entry->blockcount = r->br_blockcount; - __entry->state = r->br_state; - __entry->bmap_state = state; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " - "offset %lld block %lld count %lld flag %d caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), - (long)__entry->idx, - __entry->startoff, - (__int64_t)__entry->startblock, - __entry->blockcount, - __entry->state, - (char *)__entry->caller_ip) -); - -DECLARE_EVENT_CLASS(xfs_bmap_class, - TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, - unsigned long caller_ip), - TP_ARGS(ip, idx, state, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_extnum_t, idx) - __field(xfs_fileoff_t, startoff) - __field(xfs_fsblock_t, startblock) - __field(xfs_filblks_t, blockcount) - __field(xfs_exntst_t, state) - __field(int, bmap_state) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? - ip->i_afp : &ip->i_df; - struct xfs_bmbt_irec r; - - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->idx = idx; - __entry->startoff = r.br_startoff; - __entry->startblock = r.br_startblock; - __entry->blockcount = r.br_blockcount; - __entry->state = r.br_state; - __entry->bmap_state = state; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " - "offset %lld block %lld count %lld flag %d caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), - (long)__entry->idx, - __entry->startoff, - (__int64_t)__entry->startblock, - __entry->blockcount, - __entry->state, - (char *)__entry->caller_ip) -) - -#define DEFINE_BMAP_EVENT(name) \ -DEFINE_EVENT(xfs_bmap_class, name, \ - TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \ - unsigned long caller_ip), \ - TP_ARGS(ip, idx, state, caller_ip)) -DEFINE_BMAP_EVENT(xfs_iext_remove); -DEFINE_BMAP_EVENT(xfs_bmap_pre_update); -DEFINE_BMAP_EVENT(xfs_bmap_post_update); -DEFINE_BMAP_EVENT(xfs_extlist); - -DECLARE_EVENT_CLASS(xfs_buf_class, - TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), - TP_ARGS(bp, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_daddr_t, bno) - __field(size_t, buffer_length) - __field(int, hold) - __field(int, pincount) - __field(unsigned, lockval) - __field(unsigned, flags) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = bp->b_target->bt_dev; - __entry->bno = bp->b_bn; - __entry->buffer_length = bp->b_buffer_length; - __entry->hold = atomic_read(&bp->b_hold); - __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = bp->b_sema.count; - __entry->flags = bp->b_flags; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d flags %s caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long long)__entry->bno, - __entry->buffer_length, - __entry->hold, - __entry->pincount, - __entry->lockval, - __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), - (void *)__entry->caller_ip) -) - -#define DEFINE_BUF_EVENT(name) \ -DEFINE_EVENT(xfs_buf_class, name, \ - TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \ - TP_ARGS(bp, caller_ip)) -DEFINE_BUF_EVENT(xfs_buf_init); -DEFINE_BUF_EVENT(xfs_buf_free); -DEFINE_BUF_EVENT(xfs_buf_hold); -DEFINE_BUF_EVENT(xfs_buf_rele); -DEFINE_BUF_EVENT(xfs_buf_iodone); -DEFINE_BUF_EVENT(xfs_buf_iorequest); -DEFINE_BUF_EVENT(xfs_buf_bawrite); -DEFINE_BUF_EVENT(xfs_buf_bdwrite); -DEFINE_BUF_EVENT(xfs_buf_lock); -DEFINE_BUF_EVENT(xfs_buf_lock_done); -DEFINE_BUF_EVENT(xfs_buf_trylock); -DEFINE_BUF_EVENT(xfs_buf_unlock); -DEFINE_BUF_EVENT(xfs_buf_iowait); -DEFINE_BUF_EVENT(xfs_buf_iowait_done); -DEFINE_BUF_EVENT(xfs_buf_delwri_queue); -DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); -DEFINE_BUF_EVENT(xfs_buf_delwri_split); -DEFINE_BUF_EVENT(xfs_buf_get_uncached); -DEFINE_BUF_EVENT(xfs_bdstrat_shut); -DEFINE_BUF_EVENT(xfs_buf_item_relse); -DEFINE_BUF_EVENT(xfs_buf_item_iodone); -DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); -DEFINE_BUF_EVENT(xfs_buf_error_relse); -DEFINE_BUF_EVENT(xfs_trans_read_buf_io); -DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); - -/* not really buffer traces, but the buf provides useful information */ -DEFINE_BUF_EVENT(xfs_btree_corrupt); -DEFINE_BUF_EVENT(xfs_da_btree_corrupt); -DEFINE_BUF_EVENT(xfs_reset_dqcounts); -DEFINE_BUF_EVENT(xfs_inode_item_push); - -/* pass flags explicitly */ -DECLARE_EVENT_CLASS(xfs_buf_flags_class, - TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), - TP_ARGS(bp, flags, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_daddr_t, bno) - __field(size_t, buffer_length) - __field(int, hold) - __field(int, pincount) - __field(unsigned, lockval) - __field(unsigned, flags) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = bp->b_target->bt_dev; - __entry->bno = bp->b_bn; - __entry->buffer_length = bp->b_buffer_length; - __entry->flags = flags; - __entry->hold = atomic_read(&bp->b_hold); - __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = bp->b_sema.count; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d flags %s caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long long)__entry->bno, - __entry->buffer_length, - __entry->hold, - __entry->pincount, - __entry->lockval, - __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), - (void *)__entry->caller_ip) -) - -#define DEFINE_BUF_FLAGS_EVENT(name) \ -DEFINE_EVENT(xfs_buf_flags_class, name, \ - TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \ - TP_ARGS(bp, flags, caller_ip)) -DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); -DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); -DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); - -TRACE_EVENT(xfs_buf_ioerror, - TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip), - TP_ARGS(bp, error, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_daddr_t, bno) - __field(size_t, buffer_length) - __field(unsigned, flags) - __field(int, hold) - __field(int, pincount) - __field(unsigned, lockval) - __field(int, error) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = bp->b_target->bt_dev; - __entry->bno = bp->b_bn; - __entry->buffer_length = bp->b_buffer_length; - __entry->hold = atomic_read(&bp->b_hold); - __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = bp->b_sema.count; - __entry->error = error; - __entry->flags = bp->b_flags; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d error %d flags %s caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long long)__entry->bno, - __entry->buffer_length, - __entry->hold, - __entry->pincount, - __entry->lockval, - __entry->error, - __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), - (void *)__entry->caller_ip) -); - -DECLARE_EVENT_CLASS(xfs_buf_item_class, - TP_PROTO(struct xfs_buf_log_item *bip), - TP_ARGS(bip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_daddr_t, buf_bno) - __field(size_t, buf_len) - __field(int, buf_hold) - __field(int, buf_pincount) - __field(int, buf_lockval) - __field(unsigned, buf_flags) - __field(unsigned, bli_recur) - __field(int, bli_refcount) - __field(unsigned, bli_flags) - __field(void *, li_desc) - __field(unsigned, li_flags) - ), - TP_fast_assign( - __entry->dev = bip->bli_buf->b_target->bt_dev; - __entry->bli_flags = bip->bli_flags; - __entry->bli_recur = bip->bli_recur; - __entry->bli_refcount = atomic_read(&bip->bli_refcount); - __entry->buf_bno = bip->bli_buf->b_bn; - __entry->buf_len = bip->bli_buf->b_buffer_length; - __entry->buf_flags = bip->bli_buf->b_flags; - __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); - __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); - __entry->buf_lockval = bip->bli_buf->b_sema.count; - __entry->li_desc = bip->bli_item.li_desc; - __entry->li_flags = bip->bli_item.li_flags; - ), - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d flags %s recur %d refcount %d bliflags %s " - "lidesc 0x%p liflags %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long long)__entry->buf_bno, - __entry->buf_len, - __entry->buf_hold, - __entry->buf_pincount, - __entry->buf_lockval, - __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), - __entry->bli_recur, - __entry->bli_refcount, - __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), - __entry->li_desc, - __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) -) - -#define DEFINE_BUF_ITEM_EVENT(name) \ -DEFINE_EVENT(xfs_buf_item_class, name, \ - TP_PROTO(struct xfs_buf_log_item *bip), \ - TP_ARGS(bip)) -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf); -DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); -DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); -DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); -DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur); -DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf); -DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur); -DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf); -DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse); -DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin); -DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); -DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); -DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); - -DECLARE_EVENT_CLASS(xfs_lock_class, - TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, - unsigned long caller_ip), - TP_ARGS(ip, lock_flags, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(int, lock_flags) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->lock_flags = lock_flags; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), - (void *)__entry->caller_ip) -) - -#define DEFINE_LOCK_EVENT(name) \ -DEFINE_EVENT(xfs_lock_class, name, \ - TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \ - unsigned long caller_ip), \ - TP_ARGS(ip, lock_flags, caller_ip)) -DEFINE_LOCK_EVENT(xfs_ilock); -DEFINE_LOCK_EVENT(xfs_ilock_nowait); -DEFINE_LOCK_EVENT(xfs_ilock_demote); -DEFINE_LOCK_EVENT(xfs_iunlock); - -DECLARE_EVENT_CLASS(xfs_inode_class, - TP_PROTO(struct xfs_inode *ip), - TP_ARGS(ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - ), - TP_printk("dev %d:%d ino 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino) -) - -#define DEFINE_INODE_EVENT(name) \ -DEFINE_EVENT(xfs_inode_class, name, \ - TP_PROTO(struct xfs_inode *ip), \ - TP_ARGS(ip)) -DEFINE_INODE_EVENT(xfs_iget_skip); -DEFINE_INODE_EVENT(xfs_iget_reclaim); -DEFINE_INODE_EVENT(xfs_iget_reclaim_fail); -DEFINE_INODE_EVENT(xfs_iget_hit); -DEFINE_INODE_EVENT(xfs_iget_miss); - -DEFINE_INODE_EVENT(xfs_getattr); -DEFINE_INODE_EVENT(xfs_setattr); -DEFINE_INODE_EVENT(xfs_readlink); -DEFINE_INODE_EVENT(xfs_alloc_file_space); -DEFINE_INODE_EVENT(xfs_free_file_space); -DEFINE_INODE_EVENT(xfs_readdir); -#ifdef CONFIG_XFS_POSIX_ACL -DEFINE_INODE_EVENT(xfs_get_acl); -#endif -DEFINE_INODE_EVENT(xfs_vm_bmap); -DEFINE_INODE_EVENT(xfs_file_ioctl); -DEFINE_INODE_EVENT(xfs_file_compat_ioctl); -DEFINE_INODE_EVENT(xfs_ioctl_setattr); -DEFINE_INODE_EVENT(xfs_file_fsync); -DEFINE_INODE_EVENT(xfs_destroy_inode); -DEFINE_INODE_EVENT(xfs_write_inode); -DEFINE_INODE_EVENT(xfs_evict_inode); - -DEFINE_INODE_EVENT(xfs_dquot_dqalloc); -DEFINE_INODE_EVENT(xfs_dquot_dqdetach); - -DECLARE_EVENT_CLASS(xfs_iref_class, - TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), - TP_ARGS(ip, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(int, count) - __field(int, pincount) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->count = atomic_read(&VFS_I(ip)->i_count); - __entry->pincount = atomic_read(&ip->i_pincount); - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->count, - __entry->pincount, - (char *)__entry->caller_ip) -) - -#define DEFINE_IREF_EVENT(name) \ -DEFINE_EVENT(xfs_iref_class, name, \ - TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ - TP_ARGS(ip, caller_ip)) -DEFINE_IREF_EVENT(xfs_ihold); -DEFINE_IREF_EVENT(xfs_irele); -DEFINE_IREF_EVENT(xfs_inode_pin); -DEFINE_IREF_EVENT(xfs_inode_unpin); -DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); - -DECLARE_EVENT_CLASS(xfs_namespace_class, - TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), - TP_ARGS(dp, name), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, dp_ino) - __dynamic_array(char, name, name->len) - ), - TP_fast_assign( - __entry->dev = VFS_I(dp)->i_sb->s_dev; - __entry->dp_ino = dp->i_ino; - memcpy(__get_str(name), name->name, name->len); - ), - TP_printk("dev %d:%d dp ino 0x%llx name %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->dp_ino, - __get_str(name)) -) - -#define DEFINE_NAMESPACE_EVENT(name) \ -DEFINE_EVENT(xfs_namespace_class, name, \ - TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \ - TP_ARGS(dp, name)) -DEFINE_NAMESPACE_EVENT(xfs_remove); -DEFINE_NAMESPACE_EVENT(xfs_link); -DEFINE_NAMESPACE_EVENT(xfs_lookup); -DEFINE_NAMESPACE_EVENT(xfs_create); -DEFINE_NAMESPACE_EVENT(xfs_symlink); - -TRACE_EVENT(xfs_rename, - TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp, - struct xfs_name *src_name, struct xfs_name *target_name), - TP_ARGS(src_dp, target_dp, src_name, target_name), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, src_dp_ino) - __field(xfs_ino_t, target_dp_ino) - __dynamic_array(char, src_name, src_name->len) - __dynamic_array(char, target_name, target_name->len) - ), - TP_fast_assign( - __entry->dev = VFS_I(src_dp)->i_sb->s_dev; - __entry->src_dp_ino = src_dp->i_ino; - __entry->target_dp_ino = target_dp->i_ino; - memcpy(__get_str(src_name), src_name->name, src_name->len); - memcpy(__get_str(target_name), target_name->name, target_name->len); - ), - TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx" - " src name %s target name %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->src_dp_ino, - __entry->target_dp_ino, - __get_str(src_name), - __get_str(target_name)) -) - -DECLARE_EVENT_CLASS(xfs_dquot_class, - TP_PROTO(struct xfs_dquot *dqp), - TP_ARGS(dqp), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(u32, id) - __field(unsigned, flags) - __field(unsigned, nrefs) - __field(unsigned long long, res_bcount) - __field(unsigned long long, bcount) - __field(unsigned long long, icount) - __field(unsigned long long, blk_hardlimit) - __field(unsigned long long, blk_softlimit) - __field(unsigned long long, ino_hardlimit) - __field(unsigned long long, ino_softlimit) - ), \ - TP_fast_assign( - __entry->dev = dqp->q_mount->m_super->s_dev; - __entry->id = be32_to_cpu(dqp->q_core.d_id); - __entry->flags = dqp->dq_flags; - __entry->nrefs = dqp->q_nrefs; - __entry->res_bcount = dqp->q_res_bcount; - __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount); - __entry->icount = be64_to_cpu(dqp->q_core.d_icount); - __entry->blk_hardlimit = - be64_to_cpu(dqp->q_core.d_blk_hardlimit); - __entry->blk_softlimit = - be64_to_cpu(dqp->q_core.d_blk_softlimit); - __entry->ino_hardlimit = - be64_to_cpu(dqp->q_core.d_ino_hardlimit); - __entry->ino_softlimit = - be64_to_cpu(dqp->q_core.d_ino_softlimit); - ), - TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " - "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx " - "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->id, - __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), - __entry->nrefs, - __entry->res_bcount, - __entry->bcount, - __entry->blk_hardlimit, - __entry->blk_softlimit, - __entry->icount, - __entry->ino_hardlimit, - __entry->ino_softlimit) -) - -#define DEFINE_DQUOT_EVENT(name) \ -DEFINE_EVENT(xfs_dquot_class, name, \ - TP_PROTO(struct xfs_dquot *dqp), \ - TP_ARGS(dqp)) -DEFINE_DQUOT_EVENT(xfs_dqadjust); -DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); -DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); -DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); -DEFINE_DQUOT_EVENT(xfs_dqattach_found); -DEFINE_DQUOT_EVENT(xfs_dqattach_get); -DEFINE_DQUOT_EVENT(xfs_dqinit); -DEFINE_DQUOT_EVENT(xfs_dqreuse); -DEFINE_DQUOT_EVENT(xfs_dqalloc); -DEFINE_DQUOT_EVENT(xfs_dqtobp_read); -DEFINE_DQUOT_EVENT(xfs_dqread); -DEFINE_DQUOT_EVENT(xfs_dqread_fail); -DEFINE_DQUOT_EVENT(xfs_dqlookup_found); -DEFINE_DQUOT_EVENT(xfs_dqlookup_want); -DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist); -DEFINE_DQUOT_EVENT(xfs_dqlookup_done); -DEFINE_DQUOT_EVENT(xfs_dqget_hit); -DEFINE_DQUOT_EVENT(xfs_dqget_miss); -DEFINE_DQUOT_EVENT(xfs_dqput); -DEFINE_DQUOT_EVENT(xfs_dqput_wait); -DEFINE_DQUOT_EVENT(xfs_dqput_free); -DEFINE_DQUOT_EVENT(xfs_dqrele); -DEFINE_DQUOT_EVENT(xfs_dqflush); -DEFINE_DQUOT_EVENT(xfs_dqflush_force); -DEFINE_DQUOT_EVENT(xfs_dqflush_done); - -DECLARE_EVENT_CLASS(xfs_loggrant_class, - TP_PROTO(struct log *log, struct xlog_ticket *tic), - TP_ARGS(log, tic), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(unsigned, trans_type) - __field(char, ocnt) - __field(char, cnt) - __field(int, curr_res) - __field(int, unit_res) - __field(unsigned int, flags) - __field(int, reserveq) - __field(int, writeq) - __field(int, grant_reserve_cycle) - __field(int, grant_reserve_bytes) - __field(int, grant_write_cycle) - __field(int, grant_write_bytes) - __field(int, curr_cycle) - __field(int, curr_block) - __field(xfs_lsn_t, tail_lsn) - ), - TP_fast_assign( - __entry->dev = log->l_mp->m_super->s_dev; - __entry->trans_type = tic->t_trans_type; - __entry->ocnt = tic->t_ocnt; - __entry->cnt = tic->t_cnt; - __entry->curr_res = tic->t_curr_res; - __entry->unit_res = tic->t_unit_res; - __entry->flags = tic->t_flags; - __entry->reserveq = list_empty(&log->l_reserveq); - __entry->writeq = list_empty(&log->l_writeq); - xlog_crack_grant_head(&log->l_grant_reserve_head, - &__entry->grant_reserve_cycle, - &__entry->grant_reserve_bytes); - xlog_crack_grant_head(&log->l_grant_write_head, - &__entry->grant_write_cycle, - &__entry->grant_write_bytes); - __entry->curr_cycle = log->l_curr_cycle; - __entry->curr_block = log->l_curr_block; - __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); - ), - TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " - "t_unit_res %u t_flags %s reserveq %s " - "writeq %s grant_reserve_cycle %d " - "grant_reserve_bytes %d grant_write_cycle %d " - "grant_write_bytes %d curr_cycle %d curr_block %d " - "tail_cycle %d tail_block %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES), - __entry->ocnt, - __entry->cnt, - __entry->curr_res, - __entry->unit_res, - __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), - __entry->reserveq ? "empty" : "active", - __entry->writeq ? "empty" : "active", - __entry->grant_reserve_cycle, - __entry->grant_reserve_bytes, - __entry->grant_write_cycle, - __entry->grant_write_bytes, - __entry->curr_cycle, - __entry->curr_block, - CYCLE_LSN(__entry->tail_lsn), - BLOCK_LSN(__entry->tail_lsn) - ) -) - -#define DEFINE_LOGGRANT_EVENT(name) \ -DEFINE_EVENT(xfs_loggrant_class, name, \ - TP_PROTO(struct log *log, struct xlog_ticket *tic), \ - TP_ARGS(log, tic)) -DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); -DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); -DEFINE_LOGGRANT_EVENT(xfs_log_reserve); -DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_error); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); -DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter); -DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); -DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); - -DECLARE_EVENT_CLASS(xfs_file_class, - TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), - TP_ARGS(ip, count, offset, flags), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_fsize_t, size) - __field(xfs_fsize_t, new_size) - __field(loff_t, offset) - __field(size_t, count) - __field(int, flags) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->size = ip->i_d.di_size; - __entry->new_size = ip->i_new_size; - __entry->offset = offset; - __entry->count = count; - __entry->flags = flags; - ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " - "offset 0x%llx count 0x%zx ioflags %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->new_size, - __entry->offset, - __entry->count, - __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) -) - -#define DEFINE_RW_EVENT(name) \ -DEFINE_EVENT(xfs_file_class, name, \ - TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ - TP_ARGS(ip, count, offset, flags)) -DEFINE_RW_EVENT(xfs_file_read); -DEFINE_RW_EVENT(xfs_file_buffered_write); -DEFINE_RW_EVENT(xfs_file_direct_write); -DEFINE_RW_EVENT(xfs_file_splice_read); -DEFINE_RW_EVENT(xfs_file_splice_write); - -DECLARE_EVENT_CLASS(xfs_page_class, - TP_PROTO(struct inode *inode, struct page *page, unsigned long off), - TP_ARGS(inode, page, off), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(pgoff_t, pgoff) - __field(loff_t, size) - __field(unsigned long, offset) - __field(int, delalloc) - __field(int, unwritten) - ), - TP_fast_assign( - int delalloc = -1, unwritten = -1; - - if (page_has_buffers(page)) - xfs_count_page_state(page, &delalloc, &unwritten); - __entry->dev = inode->i_sb->s_dev; - __entry->ino = XFS_I(inode)->i_ino; - __entry->pgoff = page_offset(page); - __entry->size = i_size_read(inode); - __entry->offset = off; - __entry->delalloc = delalloc; - __entry->unwritten = unwritten; - ), - TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " - "delalloc %d unwritten %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->pgoff, - __entry->size, - __entry->offset, - __entry->delalloc, - __entry->unwritten) -) - -#define DEFINE_PAGE_EVENT(name) \ -DEFINE_EVENT(xfs_page_class, name, \ - TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ - TP_ARGS(inode, page, off)) -DEFINE_PAGE_EVENT(xfs_writepage); -DEFINE_PAGE_EVENT(xfs_releasepage); -DEFINE_PAGE_EVENT(xfs_invalidatepage); - -DECLARE_EVENT_CLASS(xfs_imap_class, - TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, - int type, struct xfs_bmbt_irec *irec), - TP_ARGS(ip, offset, count, type, irec), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(loff_t, size) - __field(loff_t, new_size) - __field(loff_t, offset) - __field(size_t, count) - __field(int, type) - __field(xfs_fileoff_t, startoff) - __field(xfs_fsblock_t, startblock) - __field(xfs_filblks_t, blockcount) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->size = ip->i_d.di_size; - __entry->new_size = ip->i_new_size; - __entry->offset = offset; - __entry->count = count; - __entry->type = type; - __entry->startoff = irec ? irec->br_startoff : 0; - __entry->startblock = irec ? irec->br_startblock : 0; - __entry->blockcount = irec ? irec->br_blockcount : 0; - ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " - "offset 0x%llx count %zd type %s " - "startoff 0x%llx startblock %lld blockcount 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->new_size, - __entry->offset, - __entry->count, - __print_symbolic(__entry->type, XFS_IO_TYPES), - __entry->startoff, - (__int64_t)__entry->startblock, - __entry->blockcount) -) - -#define DEFINE_IOMAP_EVENT(name) \ -DEFINE_EVENT(xfs_imap_class, name, \ - TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ - int type, struct xfs_bmbt_irec *irec), \ - TP_ARGS(ip, offset, count, type, irec)) -DEFINE_IOMAP_EVENT(xfs_map_blocks_found); -DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); -DEFINE_IOMAP_EVENT(xfs_get_blocks_found); -DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); - -DECLARE_EVENT_CLASS(xfs_simple_io_class, - TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), - TP_ARGS(ip, offset, count), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(loff_t, isize) - __field(loff_t, disize) - __field(loff_t, new_size) - __field(loff_t, offset) - __field(size_t, count) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->isize = ip->i_size; - __entry->disize = ip->i_d.di_size; - __entry->new_size = ip->i_new_size; - __entry->offset = offset; - __entry->count = count; - ), - TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx " - "offset 0x%llx count %zd", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->isize, - __entry->disize, - __entry->new_size, - __entry->offset, - __entry->count) -); - -#define DEFINE_SIMPLE_IO_EVENT(name) \ -DEFINE_EVENT(xfs_simple_io_class, name, \ - TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ - TP_ARGS(ip, offset, count)) -DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); -DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); -DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); -DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); - -DECLARE_EVENT_CLASS(xfs_itrunc_class, - TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), - TP_ARGS(ip, new_size), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_fsize_t, size) - __field(xfs_fsize_t, new_size) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->size = ip->i_d.di_size; - __entry->new_size = new_size; - ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->new_size) -) - -#define DEFINE_ITRUNC_EVENT(name) \ -DEFINE_EVENT(xfs_itrunc_class, name, \ - TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ - TP_ARGS(ip, new_size)) -DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start); -DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end); - -TRACE_EVENT(xfs_pagecache_inval, - TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), - TP_ARGS(ip, start, finish), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_fsize_t, size) - __field(xfs_off_t, start) - __field(xfs_off_t, finish) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->size = ip->i_d.di_size; - __entry->start = start; - __entry->finish = finish; - ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->start, - __entry->finish) -); - -TRACE_EVENT(xfs_bunmap, - TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, - int flags, unsigned long caller_ip), - TP_ARGS(ip, bno, len, flags, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_fsize_t, size) - __field(xfs_fileoff_t, bno) - __field(xfs_filblks_t, len) - __field(unsigned long, caller_ip) - __field(int, flags) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->size = ip->i_d.di_size; - __entry->bno = bno; - __entry->len = len; - __entry->caller_ip = caller_ip; - __entry->flags = flags; - ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" - "flags %s caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->bno, - __entry->len, - __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS), - (void *)__entry->caller_ip) - -); - -DECLARE_EVENT_CLASS(xfs_busy_class, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agblock_t agbno, xfs_extlen_t len), - TP_ARGS(mp, agno, agbno, len), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, agbno) - __field(xfs_extlen_t, len) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->agbno = agbno; - __entry->len = len; - ), - TP_printk("dev %d:%d agno %u agbno %u len %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->agbno, - __entry->len) -); -#define DEFINE_BUSY_EVENT(name) \ -DEFINE_EVENT(xfs_busy_class, name, \ - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ - xfs_agblock_t agbno, xfs_extlen_t len), \ - TP_ARGS(mp, agno, agbno, len)) -DEFINE_BUSY_EVENT(xfs_alloc_busy); -DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); -DEFINE_BUSY_EVENT(xfs_alloc_busy_force); -DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); -DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); - -TRACE_EVENT(xfs_alloc_busy_trim, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agblock_t agbno, xfs_extlen_t len, - xfs_agblock_t tbno, xfs_extlen_t tlen), - TP_ARGS(mp, agno, agbno, len, tbno, tlen), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, agbno) - __field(xfs_extlen_t, len) - __field(xfs_agblock_t, tbno) - __field(xfs_extlen_t, tlen) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->agbno = agbno; - __entry->len = len; - __entry->tbno = tbno; - __entry->tlen = tlen; - ), - TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->agbno, - __entry->len, - __entry->tbno, - __entry->tlen) -); - -TRACE_EVENT(xfs_trans_commit_lsn, - TP_PROTO(struct xfs_trans *trans), - TP_ARGS(trans), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(struct xfs_trans *, tp) - __field(xfs_lsn_t, lsn) - ), - TP_fast_assign( - __entry->dev = trans->t_mountp->m_super->s_dev; - __entry->tp = trans; - __entry->lsn = trans->t_commit_lsn; - ), - TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->tp, - __entry->lsn) -); - -TRACE_EVENT(xfs_agf, - TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, - unsigned long caller_ip), - TP_ARGS(mp, agf, flags, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(int, flags) - __field(__u32, length) - __field(__u32, bno_root) - __field(__u32, cnt_root) - __field(__u32, bno_level) - __field(__u32, cnt_level) - __field(__u32, flfirst) - __field(__u32, fllast) - __field(__u32, flcount) - __field(__u32, freeblks) - __field(__u32, longest) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = be32_to_cpu(agf->agf_seqno), - __entry->flags = flags; - __entry->length = be32_to_cpu(agf->agf_length), - __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]), - __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]), - __entry->bno_level = - be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]), - __entry->cnt_level = - be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]), - __entry->flfirst = be32_to_cpu(agf->agf_flfirst), - __entry->fllast = be32_to_cpu(agf->agf_fllast), - __entry->flcount = be32_to_cpu(agf->agf_flcount), - __entry->freeblks = be32_to_cpu(agf->agf_freeblks), - __entry->longest = be32_to_cpu(agf->agf_longest); - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " - "levels b %u c %u flfirst %u fllast %u flcount %u " - "freeblks %u longest %u caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), - __entry->length, - __entry->bno_root, - __entry->cnt_root, - __entry->bno_level, - __entry->cnt_level, - __entry->flfirst, - __entry->fllast, - __entry->flcount, - __entry->freeblks, - __entry->longest, - (void *)__entry->caller_ip) -); - -TRACE_EVENT(xfs_free_extent, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, - xfs_extlen_t len, bool isfl, int haveleft, int haveright), - TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, agbno) - __field(xfs_extlen_t, len) - __field(int, isfl) - __field(int, haveleft) - __field(int, haveright) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->agbno = agbno; - __entry->len = len; - __entry->isfl = isfl; - __entry->haveleft = haveleft; - __entry->haveright = haveright; - ), - TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->agbno, - __entry->len, - __entry->isfl, - __entry->haveleft ? - (__entry->haveright ? "both" : "left") : - (__entry->haveright ? "right" : "none")) - -); - -DECLARE_EVENT_CLASS(xfs_alloc_class, - TP_PROTO(struct xfs_alloc_arg *args), - TP_ARGS(args), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, agbno) - __field(xfs_extlen_t, minlen) - __field(xfs_extlen_t, maxlen) - __field(xfs_extlen_t, mod) - __field(xfs_extlen_t, prod) - __field(xfs_extlen_t, minleft) - __field(xfs_extlen_t, total) - __field(xfs_extlen_t, alignment) - __field(xfs_extlen_t, minalignslop) - __field(xfs_extlen_t, len) - __field(short, type) - __field(short, otype) - __field(char, wasdel) - __field(char, wasfromfl) - __field(char, isfl) - __field(char, userdata) - __field(xfs_fsblock_t, firstblock) - ), - TP_fast_assign( - __entry->dev = args->mp->m_super->s_dev; - __entry->agno = args->agno; - __entry->agbno = args->agbno; - __entry->minlen = args->minlen; - __entry->maxlen = args->maxlen; - __entry->mod = args->mod; - __entry->prod = args->prod; - __entry->minleft = args->minleft; - __entry->total = args->total; - __entry->alignment = args->alignment; - __entry->minalignslop = args->minalignslop; - __entry->len = args->len; - __entry->type = args->type; - __entry->otype = args->otype; - __entry->wasdel = args->wasdel; - __entry->wasfromfl = args->wasfromfl; - __entry->isfl = args->isfl; - __entry->userdata = args->userdata; - __entry->firstblock = args->firstblock; - ), - TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " - "prod %u minleft %u total %u alignment %u minalignslop %u " - "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " - "userdata %d firstblock 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->agbno, - __entry->minlen, - __entry->maxlen, - __entry->mod, - __entry->prod, - __entry->minleft, - __entry->total, - __entry->alignment, - __entry->minalignslop, - __entry->len, - __print_symbolic(__entry->type, XFS_ALLOC_TYPES), - __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), - __entry->wasdel, - __entry->wasfromfl, - __entry->isfl, - __entry->userdata, - (unsigned long long)__entry->firstblock) -) - -#define DEFINE_ALLOC_EVENT(name) \ -DEFINE_EVENT(xfs_alloc_class, name, \ - TP_PROTO(struct xfs_alloc_arg *args), \ - TP_ARGS(args)) -DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); -DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); -DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); -DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); -DEFINE_ALLOC_EVENT(xfs_alloc_near_first); -DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); -DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); -DEFINE_ALLOC_EVENT(xfs_alloc_near_error); -DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); -DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); -DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); -DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); -DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); -DEFINE_ALLOC_EVENT(xfs_alloc_size_done); -DEFINE_ALLOC_EVENT(xfs_alloc_size_error); -DEFINE_ALLOC_EVENT(xfs_alloc_size_busy); -DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); -DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); -DEFINE_ALLOC_EVENT(xfs_alloc_small_done); -DEFINE_ALLOC_EVENT(xfs_alloc_small_error); -DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs); -DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix); -DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); -DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); -DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); - -DECLARE_EVENT_CLASS(xfs_dir2_class, - TP_PROTO(struct xfs_da_args *args), - TP_ARGS(args), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __dynamic_array(char, name, args->namelen) - __field(int, namelen) - __field(xfs_dahash_t, hashval) - __field(xfs_ino_t, inumber) - __field(int, op_flags) - ), - TP_fast_assign( - __entry->dev = VFS_I(args->dp)->i_sb->s_dev; - __entry->ino = args->dp->i_ino; - if (args->namelen) - memcpy(__get_str(name), args->name, args->namelen); - __entry->namelen = args->namelen; - __entry->hashval = args->hashval; - __entry->inumber = args->inumber; - __entry->op_flags = args->op_flags; - ), - TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " - "inumber 0x%llx op_flags %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->namelen, - __entry->namelen ? __get_str(name) : NULL, - __entry->namelen, - __entry->hashval, - __entry->inumber, - __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) -) - -#define DEFINE_DIR2_EVENT(name) \ -DEFINE_EVENT(xfs_dir2_class, name, \ - TP_PROTO(struct xfs_da_args *args), \ - TP_ARGS(args)) -DEFINE_DIR2_EVENT(xfs_dir2_sf_addname); -DEFINE_DIR2_EVENT(xfs_dir2_sf_create); -DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup); -DEFINE_DIR2_EVENT(xfs_dir2_sf_replace); -DEFINE_DIR2_EVENT(xfs_dir2_sf_removename); -DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4); -DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8); -DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block); -DEFINE_DIR2_EVENT(xfs_dir2_block_addname); -DEFINE_DIR2_EVENT(xfs_dir2_block_lookup); -DEFINE_DIR2_EVENT(xfs_dir2_block_replace); -DEFINE_DIR2_EVENT(xfs_dir2_block_removename); -DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf); -DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf); -DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname); -DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup); -DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace); -DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename); -DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block); -DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node); -DEFINE_DIR2_EVENT(xfs_dir2_node_addname); -DEFINE_DIR2_EVENT(xfs_dir2_node_lookup); -DEFINE_DIR2_EVENT(xfs_dir2_node_replace); -DEFINE_DIR2_EVENT(xfs_dir2_node_removename); -DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); - -DECLARE_EVENT_CLASS(xfs_dir2_space_class, - TP_PROTO(struct xfs_da_args *args, int idx), - TP_ARGS(args, idx), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(int, op_flags) - __field(int, idx) - ), - TP_fast_assign( - __entry->dev = VFS_I(args->dp)->i_sb->s_dev; - __entry->ino = args->dp->i_ino; - __entry->op_flags = args->op_flags; - __entry->idx = idx; - ), - TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), - __entry->idx) -) - -#define DEFINE_DIR2_SPACE_EVENT(name) \ -DEFINE_EVENT(xfs_dir2_space_class, name, \ - TP_PROTO(struct xfs_da_args *args, int idx), \ - TP_ARGS(args, idx)) -DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add); -DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove); -DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode); -DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode); - -TRACE_EVENT(xfs_dir2_leafn_moveents, - TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count), - TP_ARGS(args, src_idx, dst_idx, count), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(int, op_flags) - __field(int, src_idx) - __field(int, dst_idx) - __field(int, count) - ), - TP_fast_assign( - __entry->dev = VFS_I(args->dp)->i_sb->s_dev; - __entry->ino = args->dp->i_ino; - __entry->op_flags = args->op_flags; - __entry->src_idx = src_idx; - __entry->dst_idx = dst_idx; - __entry->count = count; - ), - TP_printk("dev %d:%d ino 0x%llx op_flags %s " - "src_idx %d dst_idx %d count %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), - __entry->src_idx, - __entry->dst_idx, - __entry->count) -); - -#define XFS_SWAPEXT_INODES \ - { 0, "target" }, \ - { 1, "temp" } - -#define XFS_INODE_FORMAT_STR \ - { 0, "invalid" }, \ - { 1, "local" }, \ - { 2, "extent" }, \ - { 3, "btree" } - -DECLARE_EVENT_CLASS(xfs_swap_extent_class, - TP_PROTO(struct xfs_inode *ip, int which), - TP_ARGS(ip, which), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(int, which) - __field(xfs_ino_t, ino) - __field(int, format) - __field(int, nex) - __field(int, max_nex) - __field(int, broot_size) - __field(int, fork_off) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->which = which; - __entry->ino = ip->i_ino; - __entry->format = ip->i_d.di_format; - __entry->nex = ip->i_d.di_nextents; - __entry->max_nex = ip->i_df.if_ext_max; - __entry->broot_size = ip->i_df.if_broot_bytes; - __entry->fork_off = XFS_IFORK_BOFF(ip); - ), - TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, " - "Max in-fork extents %d, broot size %d, fork offset %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_symbolic(__entry->which, XFS_SWAPEXT_INODES), - __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), - __entry->nex, - __entry->max_nex, - __entry->broot_size, - __entry->fork_off) -) - -#define DEFINE_SWAPEXT_EVENT(name) \ -DEFINE_EVENT(xfs_swap_extent_class, name, \ - TP_PROTO(struct xfs_inode *ip, int which), \ - TP_ARGS(ip, which)) - -DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); -DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); - -DECLARE_EVENT_CLASS(xfs_log_recover_item_class, - TP_PROTO(struct log *log, struct xlog_recover *trans, - struct xlog_recover_item *item, int pass), - TP_ARGS(log, trans, item, pass), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(unsigned long, item) - __field(xlog_tid_t, tid) - __field(int, type) - __field(int, pass) - __field(int, count) - __field(int, total) - ), - TP_fast_assign( - __entry->dev = log->l_mp->m_super->s_dev; - __entry->item = (unsigned long)item; - __entry->tid = trans->r_log_tid; - __entry->type = ITEM_TYPE(item); - __entry->pass = pass; - __entry->count = item->ri_cnt; - __entry->total = item->ri_total; - ), - TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s " - "item region count/total %d/%d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->tid, - __entry->pass, - (void *)__entry->item, - __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), - __entry->count, - __entry->total) -) - -#define DEFINE_LOG_RECOVER_ITEM(name) \ -DEFINE_EVENT(xfs_log_recover_item_class, name, \ - TP_PROTO(struct log *log, struct xlog_recover *trans, \ - struct xlog_recover_item *item, int pass), \ - TP_ARGS(log, trans, item, pass)) - -DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add); -DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont); -DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head); -DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail); -DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover); - -DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, - TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), - TP_ARGS(log, buf_f), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(__int64_t, blkno) - __field(unsigned short, len) - __field(unsigned short, flags) - __field(unsigned short, size) - __field(unsigned int, map_size) - ), - TP_fast_assign( - __entry->dev = log->l_mp->m_super->s_dev; - __entry->blkno = buf_f->blf_blkno; - __entry->len = buf_f->blf_len; - __entry->flags = buf_f->blf_flags; - __entry->size = buf_f->blf_size; - __entry->map_size = buf_f->blf_map_size; - ), - TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, " - "map_size %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->blkno, - __entry->len, - __entry->flags, - __entry->size, - __entry->map_size) -) - -#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \ -DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \ - TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \ - TP_ARGS(log, buf_f)) - -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf); - -DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, - TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), - TP_ARGS(log, in_f), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(unsigned short, size) - __field(int, fields) - __field(unsigned short, asize) - __field(unsigned short, dsize) - __field(__int64_t, blkno) - __field(int, len) - __field(int, boffset) - ), - TP_fast_assign( - __entry->dev = log->l_mp->m_super->s_dev; - __entry->ino = in_f->ilf_ino; - __entry->size = in_f->ilf_size; - __entry->fields = in_f->ilf_fields; - __entry->asize = in_f->ilf_asize; - __entry->dsize = in_f->ilf_dsize; - __entry->blkno = in_f->ilf_blkno; - __entry->len = in_f->ilf_len; - __entry->boffset = in_f->ilf_boffset; - ), - TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, " - "dsize %d, blkno 0x%llx, len %d, boffset %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->fields, - __entry->asize, - __entry->dsize, - __entry->blkno, - __entry->len, - __entry->boffset) -) -#define DEFINE_LOG_RECOVER_INO_ITEM(name) \ -DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \ - TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \ - TP_ARGS(log, in_f)) - -DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); -DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); -DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); - -DECLARE_EVENT_CLASS(xfs_discard_class, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agblock_t agbno, xfs_extlen_t len), - TP_ARGS(mp, agno, agbno, len), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, agbno) - __field(xfs_extlen_t, len) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->agbno = agbno; - __entry->len = len; - ), - TP_printk("dev %d:%d agno %u agbno %u len %u\n", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->agbno, - __entry->len) -) - -#define DEFINE_DISCARD_EVENT(name) \ -DEFINE_EVENT(xfs_discard_class, name, \ - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ - xfs_agblock_t agbno, xfs_extlen_t len), \ - TP_ARGS(mp, agno, agbno, len)) -DEFINE_DISCARD_EVENT(xfs_discard_extent); -DEFINE_DISCARD_EVENT(xfs_discard_toosmall); -DEFINE_DISCARD_EVENT(xfs_discard_exclude); -DEFINE_DISCARD_EVENT(xfs_discard_busy); - -#endif /* _TRACE_XFS_H */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE xfs_trace -#include diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h deleted file mode 100644 index 7c220b4..0000000 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_VNODE_H__ -#define __XFS_VNODE_H__ - -#include "xfs_fs.h" - -struct file; -struct xfs_inode; -struct xfs_iomap; -struct attrlist_cursor_kern; - -/* - * Return values for xfs_inactive. A return value of - * VN_INACTIVE_NOCACHE implies that the file system behavior - * has disassociated its state and bhv_desc_t from the vnode. - */ -#define VN_INACTIVE_CACHE 0 -#define VN_INACTIVE_NOCACHE 1 - -/* - * Flags for read/write calls - same values as IRIX - */ -#define IO_ISDIRECT 0x00004 /* bypass page cache */ -#define IO_INVIS 0x00020 /* don't update inode timestamps */ - -#define XFS_IO_FLAGS \ - { IO_ISDIRECT, "DIRECT" }, \ - { IO_INVIS, "INVIS"} - -/* - * Flush/Invalidate options for vop_toss/flush/flushinval_pages. - */ -#define FI_NONE 0 /* none */ -#define FI_REMAPF 1 /* Do a remapf prior to the operation */ -#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation. - Prevent VM access to the pages until - the operation completes. */ - -/* - * Some useful predicates. - */ -#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) -#define VN_CACHED(vp) (vp->i_mapping->nrpages) -#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \ - PAGECACHE_TAG_DIRTY) - - -#endif /* __XFS_VNODE_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c deleted file mode 100644 index 87d3e03..0000000 --- a/fs/xfs/linux-2.6/xfs_xattr.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (C) 2008 Christoph Hellwig. - * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "xfs.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_attr.h" -#include "xfs_attr_leaf.h" -#include "xfs_acl.h" -#include "xfs_vnodeops.h" - -#include -#include - - -static int -xfs_xattr_get(struct dentry *dentry, const char *name, - void *value, size_t size, int xflags) -{ - struct xfs_inode *ip = XFS_I(dentry->d_inode); - int error, asize = size; - - if (strcmp(name, "") == 0) - return -EINVAL; - - /* Convert Linux syscall to XFS internal ATTR flags */ - if (!size) { - xflags |= ATTR_KERNOVAL; - value = NULL; - } - - error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); - if (error) - return error; - return asize; -} - -static int -xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags, int xflags) -{ - struct xfs_inode *ip = XFS_I(dentry->d_inode); - - if (strcmp(name, "") == 0) - return -EINVAL; - - /* Convert Linux syscall to XFS internal ATTR flags */ - if (flags & XATTR_CREATE) - xflags |= ATTR_CREATE; - if (flags & XATTR_REPLACE) - xflags |= ATTR_REPLACE; - - if (!value) - return -xfs_attr_remove(ip, (unsigned char *)name, xflags); - return -xfs_attr_set(ip, (unsigned char *)name, - (void *)value, size, xflags); -} - -static const struct xattr_handler xfs_xattr_user_handler = { - .prefix = XATTR_USER_PREFIX, - .flags = 0, /* no flags implies user namespace */ - .get = xfs_xattr_get, - .set = xfs_xattr_set, -}; - -static const struct xattr_handler xfs_xattr_trusted_handler = { - .prefix = XATTR_TRUSTED_PREFIX, - .flags = ATTR_ROOT, - .get = xfs_xattr_get, - .set = xfs_xattr_set, -}; - -static const struct xattr_handler xfs_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .flags = ATTR_SECURE, - .get = xfs_xattr_get, - .set = xfs_xattr_set, -}; - -const struct xattr_handler *xfs_xattr_handlers[] = { - &xfs_xattr_user_handler, - &xfs_xattr_trusted_handler, - &xfs_xattr_security_handler, -#ifdef CONFIG_XFS_POSIX_ACL - &xfs_xattr_acl_access_handler, - &xfs_xattr_acl_default_handler, -#endif - NULL -}; - -static unsigned int xfs_xattr_prefix_len(int flags) -{ - if (flags & XFS_ATTR_SECURE) - return sizeof("security"); - else if (flags & XFS_ATTR_ROOT) - return sizeof("trusted"); - else - return sizeof("user"); -} - -static const char *xfs_xattr_prefix(int flags) -{ - if (flags & XFS_ATTR_SECURE) - return xfs_xattr_security_handler.prefix; - else if (flags & XFS_ATTR_ROOT) - return xfs_xattr_trusted_handler.prefix; - else - return xfs_xattr_user_handler.prefix; -} - -static int -xfs_xattr_put_listent( - struct xfs_attr_list_context *context, - int flags, - unsigned char *name, - int namelen, - int valuelen, - unsigned char *value) -{ - unsigned int prefix_len = xfs_xattr_prefix_len(flags); - char *offset; - int arraytop; - - ASSERT(context->count >= 0); - - /* - * Only show root namespace entries if we are actually allowed to - * see them. - */ - if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN)) - return 0; - - arraytop = context->count + prefix_len + namelen + 1; - if (arraytop > context->firstu) { - context->count = -1; /* insufficient space */ - return 1; - } - offset = (char *)context->alist + context->count; - strncpy(offset, xfs_xattr_prefix(flags), prefix_len); - offset += prefix_len; - strncpy(offset, (char *)name, namelen); /* real name */ - offset += namelen; - *offset = '\0'; - context->count += prefix_len + namelen + 1; - return 0; -} - -static int -xfs_xattr_put_listent_sizes( - struct xfs_attr_list_context *context, - int flags, - unsigned char *name, - int namelen, - int valuelen, - unsigned char *value) -{ - context->count += xfs_xattr_prefix_len(flags) + namelen + 1; - return 0; -} - -static int -list_one_attr(const char *name, const size_t len, void *data, - size_t size, ssize_t *result) -{ - char *p = data + *result; - - *result += len; - if (!size) - return 0; - if (*result > size) - return -ERANGE; - - strcpy(p, name); - return 0; -} - -ssize_t -xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) -{ - struct xfs_attr_list_context context; - struct attrlist_cursor_kern cursor = { 0 }; - struct inode *inode = dentry->d_inode; - int error; - - /* - * First read the regular on-disk attributes. - */ - memset(&context, 0, sizeof(context)); - context.dp = XFS_I(inode); - context.cursor = &cursor; - context.resynch = 1; - context.alist = data; - context.bufsize = size; - context.firstu = context.bufsize; - - if (size) - context.put_listent = xfs_xattr_put_listent; - else - context.put_listent = xfs_xattr_put_listent_sizes; - - xfs_attr_list_int(&context); - if (context.count < 0) - return -ERANGE; - - /* - * Then add the two synthetic ACL attributes. - */ - if (posix_acl_access_exists(inode)) { - error = list_one_attr(POSIX_ACL_XATTR_ACCESS, - strlen(POSIX_ACL_XATTR_ACCESS) + 1, - data, size, &context.count); - if (error) - return error; - } - - if (posix_acl_default_exists(inode)) { - error = list_one_attr(POSIX_ACL_XATTR_DEFAULT, - strlen(POSIX_ACL_XATTR_DEFAULT) + 1, - data, size, &context.count); - if (error) - return error; - } - - return context.count; -} diff --git a/fs/xfs/mrlock.h b/fs/xfs/mrlock.h new file mode 100644 index 0000000..ff6a198 --- /dev/null +++ b/fs/xfs/mrlock.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SUPPORT_MRLOCK_H__ +#define __XFS_SUPPORT_MRLOCK_H__ + +#include + +typedef struct { + struct rw_semaphore mr_lock; +#ifdef DEBUG + int mr_writer; +#endif +} mrlock_t; + +#ifdef DEBUG +#define mrinit(mrp, name) \ + do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0) +#else +#define mrinit(mrp, name) \ + do { init_rwsem(&(mrp)->mr_lock); } while (0) +#endif + +#define mrlock_init(mrp, t,n,s) mrinit(mrp, n) +#define mrfree(mrp) do { } while (0) + +static inline void mraccess_nested(mrlock_t *mrp, int subclass) +{ + down_read_nested(&mrp->mr_lock, subclass); +} + +static inline void mrupdate_nested(mrlock_t *mrp, int subclass) +{ + down_write_nested(&mrp->mr_lock, subclass); +#ifdef DEBUG + mrp->mr_writer = 1; +#endif +} + +static inline int mrtryaccess(mrlock_t *mrp) +{ + return down_read_trylock(&mrp->mr_lock); +} + +static inline int mrtryupdate(mrlock_t *mrp) +{ + if (!down_write_trylock(&mrp->mr_lock)) + return 0; +#ifdef DEBUG + mrp->mr_writer = 1; +#endif + return 1; +} + +static inline void mrunlock_excl(mrlock_t *mrp) +{ +#ifdef DEBUG + mrp->mr_writer = 0; +#endif + up_write(&mrp->mr_lock); +} + +static inline void mrunlock_shared(mrlock_t *mrp) +{ + up_read(&mrp->mr_lock); +} + +static inline void mrdemote(mrlock_t *mrp) +{ +#ifdef DEBUG + mrp->mr_writer = 0; +#endif + downgrade_write(&mrp->mr_lock); +} + +#endif /* __XFS_SUPPORT_MRLOCK_H__ */ diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c deleted file mode 100644 index db62959..0000000 --- a/fs/xfs/quota/xfs_dquot.c +++ /dev/null @@ -1,1454 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_trans_space.h" -#include "xfs_trans_priv.h" -#include "xfs_qm.h" -#include "xfs_trace.h" - - -/* - LOCK ORDER - - inode lock (ilock) - dquot hash-chain lock (hashlock) - xqm dquot freelist lock (freelistlock - mount's dquot list lock (mplistlock) - user dquot lock - lock ordering among dquots is based on the uid or gid - group dquot lock - similar to udquots. Between the two dquots, the udquot - has to be locked first. - pin lock - the dquot lock must be held to take this lock. - flush lock - ditto. -*/ - -#ifdef DEBUG -xfs_buftarg_t *xfs_dqerror_target; -int xfs_do_dqerror; -int xfs_dqreq_num; -int xfs_dqerror_mod = 33; -#endif - -static struct lock_class_key xfs_dquot_other_class; - -/* - * Allocate and initialize a dquot. We don't always allocate fresh memory; - * we try to reclaim a free dquot if the number of incore dquots are above - * a threshold. - * The only field inside the core that gets initialized at this point - * is the d_id field. The idea is to fill in the entire q_core - * when we read in the on disk dquot. - */ -STATIC xfs_dquot_t * -xfs_qm_dqinit( - xfs_mount_t *mp, - xfs_dqid_t id, - uint type) -{ - xfs_dquot_t *dqp; - boolean_t brandnewdquot; - - brandnewdquot = xfs_qm_dqalloc_incore(&dqp); - dqp->dq_flags = type; - dqp->q_core.d_id = cpu_to_be32(id); - dqp->q_mount = mp; - - /* - * No need to re-initialize these if this is a reclaimed dquot. - */ - if (brandnewdquot) { - INIT_LIST_HEAD(&dqp->q_freelist); - mutex_init(&dqp->q_qlock); - init_waitqueue_head(&dqp->q_pinwait); - - /* - * Because we want to use a counting completion, complete - * the flush completion once to allow a single access to - * the flush completion without blocking. - */ - init_completion(&dqp->q_flush); - complete(&dqp->q_flush); - - trace_xfs_dqinit(dqp); - } else { - /* - * Only the q_core portion was zeroed in dqreclaim_one(). - * So, we need to reset others. - */ - dqp->q_nrefs = 0; - dqp->q_blkno = 0; - INIT_LIST_HEAD(&dqp->q_mplist); - INIT_LIST_HEAD(&dqp->q_hashlist); - dqp->q_bufoffset = 0; - dqp->q_fileoffset = 0; - dqp->q_transp = NULL; - dqp->q_gdquot = NULL; - dqp->q_res_bcount = 0; - dqp->q_res_icount = 0; - dqp->q_res_rtbcount = 0; - atomic_set(&dqp->q_pincount, 0); - dqp->q_hash = NULL; - ASSERT(list_empty(&dqp->q_freelist)); - - trace_xfs_dqreuse(dqp); - } - - /* - * In either case we need to make sure group quotas have a different - * lock class than user quotas, to make sure lockdep knows we can - * locks of one of each at the same time. - */ - if (!(type & XFS_DQ_USER)) - lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); - - /* - * log item gets initialized later - */ - return (dqp); -} - -/* - * This is called to free all the memory associated with a dquot - */ -void -xfs_qm_dqdestroy( - xfs_dquot_t *dqp) -{ - ASSERT(list_empty(&dqp->q_freelist)); - - mutex_destroy(&dqp->q_qlock); - kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); - - atomic_dec(&xfs_Gqm->qm_totaldquots); -} - -/* - * This is what a 'fresh' dquot inside a dquot chunk looks like on disk. - */ -STATIC void -xfs_qm_dqinit_core( - xfs_dqid_t id, - uint type, - xfs_dqblk_t *d) -{ - /* - * Caller has zero'd the entire dquot 'chunk' already. - */ - d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); - d->dd_diskdq.d_version = XFS_DQUOT_VERSION; - d->dd_diskdq.d_id = cpu_to_be32(id); - d->dd_diskdq.d_flags = type; -} - -/* - * If default limits are in force, push them into the dquot now. - * We overwrite the dquot limits only if they are zero and this - * is not the root dquot. - */ -void -xfs_qm_adjust_dqlimits( - xfs_mount_t *mp, - xfs_disk_dquot_t *d) -{ - xfs_quotainfo_t *q = mp->m_quotainfo; - - ASSERT(d->d_id); - - if (q->qi_bsoftlimit && !d->d_blk_softlimit) - d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); - if (q->qi_bhardlimit && !d->d_blk_hardlimit) - d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); - if (q->qi_isoftlimit && !d->d_ino_softlimit) - d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); - if (q->qi_ihardlimit && !d->d_ino_hardlimit) - d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit); - if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit) - d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); - if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) - d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); -} - -/* - * Check the limits and timers of a dquot and start or reset timers - * if necessary. - * This gets called even when quota enforcement is OFF, which makes our - * life a little less complicated. (We just don't reject any quota - * reservations in that case, when enforcement is off). - * We also return 0 as the values of the timers in Q_GETQUOTA calls, when - * enforcement's off. - * In contrast, warnings are a little different in that they don't - * 'automatically' get started when limits get exceeded. They do - * get reset to zero, however, when we find the count to be under - * the soft limit (they are only ever set non-zero via userspace). - */ -void -xfs_qm_adjust_dqtimers( - xfs_mount_t *mp, - xfs_disk_dquot_t *d) -{ - ASSERT(d->d_id); - -#ifdef DEBUG - if (d->d_blk_hardlimit) - ASSERT(be64_to_cpu(d->d_blk_softlimit) <= - be64_to_cpu(d->d_blk_hardlimit)); - if (d->d_ino_hardlimit) - ASSERT(be64_to_cpu(d->d_ino_softlimit) <= - be64_to_cpu(d->d_ino_hardlimit)); - if (d->d_rtb_hardlimit) - ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= - be64_to_cpu(d->d_rtb_hardlimit)); -#endif - - if (!d->d_btimer) { - if ((d->d_blk_softlimit && - (be64_to_cpu(d->d_bcount) >= - be64_to_cpu(d->d_blk_softlimit))) || - (d->d_blk_hardlimit && - (be64_to_cpu(d->d_bcount) >= - be64_to_cpu(d->d_blk_hardlimit)))) { - d->d_btimer = cpu_to_be32(get_seconds() + - mp->m_quotainfo->qi_btimelimit); - } else { - d->d_bwarns = 0; - } - } else { - if ((!d->d_blk_softlimit || - (be64_to_cpu(d->d_bcount) < - be64_to_cpu(d->d_blk_softlimit))) && - (!d->d_blk_hardlimit || - (be64_to_cpu(d->d_bcount) < - be64_to_cpu(d->d_blk_hardlimit)))) { - d->d_btimer = 0; - } - } - - if (!d->d_itimer) { - if ((d->d_ino_softlimit && - (be64_to_cpu(d->d_icount) >= - be64_to_cpu(d->d_ino_softlimit))) || - (d->d_ino_hardlimit && - (be64_to_cpu(d->d_icount) >= - be64_to_cpu(d->d_ino_hardlimit)))) { - d->d_itimer = cpu_to_be32(get_seconds() + - mp->m_quotainfo->qi_itimelimit); - } else { - d->d_iwarns = 0; - } - } else { - if ((!d->d_ino_softlimit || - (be64_to_cpu(d->d_icount) < - be64_to_cpu(d->d_ino_softlimit))) && - (!d->d_ino_hardlimit || - (be64_to_cpu(d->d_icount) < - be64_to_cpu(d->d_ino_hardlimit)))) { - d->d_itimer = 0; - } - } - - if (!d->d_rtbtimer) { - if ((d->d_rtb_softlimit && - (be64_to_cpu(d->d_rtbcount) >= - be64_to_cpu(d->d_rtb_softlimit))) || - (d->d_rtb_hardlimit && - (be64_to_cpu(d->d_rtbcount) >= - be64_to_cpu(d->d_rtb_hardlimit)))) { - d->d_rtbtimer = cpu_to_be32(get_seconds() + - mp->m_quotainfo->qi_rtbtimelimit); - } else { - d->d_rtbwarns = 0; - } - } else { - if ((!d->d_rtb_softlimit || - (be64_to_cpu(d->d_rtbcount) < - be64_to_cpu(d->d_rtb_softlimit))) && - (!d->d_rtb_hardlimit || - (be64_to_cpu(d->d_rtbcount) < - be64_to_cpu(d->d_rtb_hardlimit)))) { - d->d_rtbtimer = 0; - } - } -} - -/* - * initialize a buffer full of dquots and log the whole thing - */ -STATIC void -xfs_qm_init_dquot_blk( - xfs_trans_t *tp, - xfs_mount_t *mp, - xfs_dqid_t id, - uint type, - xfs_buf_t *bp) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - xfs_dqblk_t *d; - int curid, i; - - ASSERT(tp); - ASSERT(xfs_buf_islocked(bp)); - - d = bp->b_addr; - - /* - * ID of the first dquot in the block - id's are zero based. - */ - curid = id - (id % q->qi_dqperchunk); - ASSERT(curid >= 0); - memset(d, 0, BBTOB(q->qi_dqchunklen)); - for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) - xfs_qm_dqinit_core(curid, type, d); - xfs_trans_dquot_buf(tp, bp, - (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : - ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : - XFS_BLF_GDQUOT_BUF))); - xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); -} - - - -/* - * Allocate a block and fill it with dquots. - * This is called when the bmapi finds a hole. - */ -STATIC int -xfs_qm_dqalloc( - xfs_trans_t **tpp, - xfs_mount_t *mp, - xfs_dquot_t *dqp, - xfs_inode_t *quotip, - xfs_fileoff_t offset_fsb, - xfs_buf_t **O_bpp) -{ - xfs_fsblock_t firstblock; - xfs_bmap_free_t flist; - xfs_bmbt_irec_t map; - int nmaps, error, committed; - xfs_buf_t *bp; - xfs_trans_t *tp = *tpp; - - ASSERT(tp != NULL); - - trace_xfs_dqalloc(dqp); - - /* - * Initialize the bmap freelist prior to calling bmapi code. - */ - xfs_bmap_init(&flist, &firstblock); - xfs_ilock(quotip, XFS_ILOCK_EXCL); - /* - * Return if this type of quotas is turned off while we didn't - * have an inode lock - */ - if (XFS_IS_THIS_QUOTA_OFF(dqp)) { - xfs_iunlock(quotip, XFS_ILOCK_EXCL); - return (ESRCH); - } - - xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL); - nmaps = 1; - if ((error = xfs_bmapi(tp, quotip, - offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, - XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, - &firstblock, - XFS_QM_DQALLOC_SPACE_RES(mp), - &map, &nmaps, &flist))) { - goto error0; - } - ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); - ASSERT(nmaps == 1); - ASSERT((map.br_startblock != DELAYSTARTBLOCK) && - (map.br_startblock != HOLESTARTBLOCK)); - - /* - * Keep track of the blkno to save a lookup later - */ - dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); - - /* now we can just get the buffer (there's nothing to read yet) */ - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, - dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, - 0); - if (!bp || (error = xfs_buf_geterror(bp))) - goto error1; - /* - * Make a chunk of dquots out of this buffer and log - * the entire thing. - */ - xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), - dqp->dq_flags & XFS_DQ_ALLTYPES, bp); - - /* - * xfs_bmap_finish() may commit the current transaction and - * start a second transaction if the freelist is not empty. - * - * Since we still want to modify this buffer, we need to - * ensure that the buffer is not released on commit of - * the first transaction and ensure the buffer is added to the - * second transaction. - * - * If there is only one transaction then don't stop the buffer - * from being released when it commits later on. - */ - - xfs_trans_bhold(tp, bp); - - if ((error = xfs_bmap_finish(tpp, &flist, &committed))) { - goto error1; - } - - if (committed) { - tp = *tpp; - xfs_trans_bjoin(tp, bp); - } else { - xfs_trans_bhold_release(tp, bp); - } - - *O_bpp = bp; - return 0; - - error1: - xfs_bmap_cancel(&flist); - error0: - xfs_iunlock(quotip, XFS_ILOCK_EXCL); - - return (error); -} - -/* - * Maps a dquot to the buffer containing its on-disk version. - * This returns a ptr to the buffer containing the on-disk dquot - * in the bpp param, and a ptr to the on-disk dquot within that buffer - */ -STATIC int -xfs_qm_dqtobp( - xfs_trans_t **tpp, - xfs_dquot_t *dqp, - xfs_disk_dquot_t **O_ddpp, - xfs_buf_t **O_bpp, - uint flags) -{ - xfs_bmbt_irec_t map; - int nmaps = 1, error; - xfs_buf_t *bp; - xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); - xfs_mount_t *mp = dqp->q_mount; - xfs_disk_dquot_t *ddq; - xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); - xfs_trans_t *tp = (tpp ? *tpp : NULL); - - dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; - - xfs_ilock(quotip, XFS_ILOCK_SHARED); - if (XFS_IS_THIS_QUOTA_OFF(dqp)) { - /* - * Return if this type of quotas is turned off while we - * didn't have the quota inode lock. - */ - xfs_iunlock(quotip, XFS_ILOCK_SHARED); - return ESRCH; - } - - /* - * Find the block map; no allocations yet - */ - error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, - XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, - NULL, 0, &map, &nmaps, NULL); - - xfs_iunlock(quotip, XFS_ILOCK_SHARED); - if (error) - return error; - - ASSERT(nmaps == 1); - ASSERT(map.br_blockcount == 1); - - /* - * Offset of dquot in the (fixed sized) dquot chunk. - */ - dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * - sizeof(xfs_dqblk_t); - - ASSERT(map.br_startblock != DELAYSTARTBLOCK); - if (map.br_startblock == HOLESTARTBLOCK) { - /* - * We don't allocate unless we're asked to - */ - if (!(flags & XFS_QMOPT_DQALLOC)) - return ENOENT; - - ASSERT(tp); - error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, - dqp->q_fileoffset, &bp); - if (error) - return error; - tp = *tpp; - } else { - trace_xfs_dqtobp_read(dqp); - - /* - * store the blkno etc so that we don't have to do the - * mapping all the time - */ - dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); - - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, - 0, &bp); - if (error || !bp) - return XFS_ERROR(error); - } - - ASSERT(xfs_buf_islocked(bp)); - - /* - * calculate the location of the dquot inside the buffer. - */ - ddq = bp->b_addr + dqp->q_bufoffset; - - /* - * A simple sanity check in case we got a corrupted dquot... - */ - error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, - flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), - "dqtobp"); - if (error) { - if (!(flags & XFS_QMOPT_DQREPAIR)) { - xfs_trans_brelse(tp, bp); - return XFS_ERROR(EIO); - } - } - - *O_bpp = bp; - *O_ddpp = ddq; - - return (0); -} - - -/* - * Read in the ondisk dquot using dqtobp() then copy it to an incore version, - * and release the buffer immediately. - * - */ -/* ARGSUSED */ -STATIC int -xfs_qm_dqread( - xfs_trans_t **tpp, - xfs_dqid_t id, - xfs_dquot_t *dqp, /* dquot to get filled in */ - uint flags) -{ - xfs_disk_dquot_t *ddqp; - xfs_buf_t *bp; - int error; - xfs_trans_t *tp; - - ASSERT(tpp); - - trace_xfs_dqread(dqp); - - /* - * get a pointer to the on-disk dquot and the buffer containing it - * dqp already knows its own type (GROUP/USER). - */ - if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { - return (error); - } - tp = *tpp; - - /* copy everything from disk dquot to the incore dquot */ - memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); - ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); - xfs_qm_dquot_logitem_init(dqp); - - /* - * Reservation counters are defined as reservation plus current usage - * to avoid having to add every time. - */ - dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); - dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); - dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); - - /* Mark the buf so that this will stay incore a little longer */ - XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF); - - /* - * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) - * So we need to release with xfs_trans_brelse(). - * The strategy here is identical to that of inodes; we lock - * the dquot in xfs_qm_dqget() before making it accessible to - * others. This is because dquots, like inodes, need a good level of - * concurrency, and we don't want to take locks on the entire buffers - * for dquot accesses. - * Note also that the dquot buffer may even be dirty at this point, if - * this particular dquot was repaired. We still aren't afraid to - * brelse it because we have the changes incore. - */ - ASSERT(xfs_buf_islocked(bp)); - xfs_trans_brelse(tp, bp); - - return (error); -} - - -/* - * allocate an incore dquot from the kernel heap, - * and fill its core with quota information kept on disk. - * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk - * if it wasn't already allocated. - */ -STATIC int -xfs_qm_idtodq( - xfs_mount_t *mp, - xfs_dqid_t id, /* gid or uid, depending on type */ - uint type, /* UDQUOT or GDQUOT */ - uint flags, /* DQALLOC, DQREPAIR */ - xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */ -{ - xfs_dquot_t *dqp; - int error; - xfs_trans_t *tp; - int cancelflags=0; - - dqp = xfs_qm_dqinit(mp, id, type); - tp = NULL; - if (flags & XFS_QMOPT_DQALLOC) { - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); - error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), - XFS_WRITE_LOG_RES(mp) + - BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + - 128, - 0, - XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); - if (error) { - cancelflags = 0; - goto error0; - } - cancelflags = XFS_TRANS_RELEASE_LOG_RES; - } - - /* - * Read it from disk; xfs_dqread() takes care of - * all the necessary initialization of dquot's fields (locks, etc) - */ - if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) { - /* - * This can happen if quotas got turned off (ESRCH), - * or if the dquot didn't exist on disk and we ask to - * allocate (ENOENT). - */ - trace_xfs_dqread_fail(dqp); - cancelflags |= XFS_TRANS_ABORT; - goto error0; - } - if (tp) { - if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) - goto error1; - } - - *O_dqpp = dqp; - return (0); - - error0: - ASSERT(error); - if (tp) - xfs_trans_cancel(tp, cancelflags); - error1: - xfs_qm_dqdestroy(dqp); - *O_dqpp = NULL; - return (error); -} - -/* - * Lookup a dquot in the incore dquot hashtable. We keep two separate - * hashtables for user and group dquots; and, these are global tables - * inside the XQM, not per-filesystem tables. - * The hash chain must be locked by caller, and it is left locked - * on return. Returning dquot is locked. - */ -STATIC int -xfs_qm_dqlookup( - xfs_mount_t *mp, - xfs_dqid_t id, - xfs_dqhash_t *qh, - xfs_dquot_t **O_dqpp) -{ - xfs_dquot_t *dqp; - uint flist_locked; - - ASSERT(mutex_is_locked(&qh->qh_lock)); - - flist_locked = B_FALSE; - - /* - * Traverse the hashchain looking for a match - */ - list_for_each_entry(dqp, &qh->qh_list, q_hashlist) { - /* - * We already have the hashlock. We don't need the - * dqlock to look at the id field of the dquot, since the - * id can't be modified without the hashlock anyway. - */ - if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { - trace_xfs_dqlookup_found(dqp); - - /* - * All in core dquots must be on the dqlist of mp - */ - ASSERT(!list_empty(&dqp->q_mplist)); - - xfs_dqlock(dqp); - if (dqp->q_nrefs == 0) { - ASSERT(!list_empty(&dqp->q_freelist)); - if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { - trace_xfs_dqlookup_want(dqp); - - /* - * We may have raced with dqreclaim_one() - * (and lost). So, flag that we don't - * want the dquot to be reclaimed. - */ - dqp->dq_flags |= XFS_DQ_WANT; - xfs_dqunlock(dqp); - mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); - xfs_dqlock(dqp); - dqp->dq_flags &= ~(XFS_DQ_WANT); - } - flist_locked = B_TRUE; - } - - /* - * id couldn't have changed; we had the hashlock all - * along - */ - ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); - - if (flist_locked) { - if (dqp->q_nrefs != 0) { - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - flist_locked = B_FALSE; - } else { - /* take it off the freelist */ - trace_xfs_dqlookup_freelist(dqp); - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - } - } - - XFS_DQHOLD(dqp); - - if (flist_locked) - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - /* - * move the dquot to the front of the hashchain - */ - ASSERT(mutex_is_locked(&qh->qh_lock)); - list_move(&dqp->q_hashlist, &qh->qh_list); - trace_xfs_dqlookup_done(dqp); - *O_dqpp = dqp; - return 0; - } - } - - *O_dqpp = NULL; - ASSERT(mutex_is_locked(&qh->qh_lock)); - return (1); -} - -/* - * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a - * a locked dquot, doing an allocation (if requested) as needed. - * When both an inode and an id are given, the inode's id takes precedence. - * That is, if the id changes while we don't hold the ilock inside this - * function, the new dquot is returned, not necessarily the one requested - * in the id argument. - */ -int -xfs_qm_dqget( - xfs_mount_t *mp, - xfs_inode_t *ip, /* locked inode (optional) */ - xfs_dqid_t id, /* uid/projid/gid depending on type */ - uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */ - uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ - xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ -{ - xfs_dquot_t *dqp; - xfs_dqhash_t *h; - uint version; - int error; - - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || - (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || - (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { - return (ESRCH); - } - h = XFS_DQ_HASH(mp, id, type); - -#ifdef DEBUG - if (xfs_do_dqerror) { - if ((xfs_dqerror_target == mp->m_ddev_targp) && - (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { - xfs_debug(mp, "Returning error in dqget"); - return (EIO); - } - } -#endif - - again: - -#ifdef DEBUG - ASSERT(type == XFS_DQ_USER || - type == XFS_DQ_PROJ || - type == XFS_DQ_GROUP); - if (ip) { - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (type == XFS_DQ_USER) - ASSERT(ip->i_udquot == NULL); - else - ASSERT(ip->i_gdquot == NULL); - } -#endif - mutex_lock(&h->qh_lock); - - /* - * Look in the cache (hashtable). - * The chain is kept locked during lookup. - */ - if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) { - XQM_STATS_INC(xqmstats.xs_qm_dqcachehits); - /* - * The dquot was found, moved to the front of the chain, - * taken off the freelist if it was on it, and locked - * at this point. Just unlock the hashchain and return. - */ - ASSERT(*O_dqpp); - ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); - mutex_unlock(&h->qh_lock); - trace_xfs_dqget_hit(*O_dqpp); - return (0); /* success */ - } - XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); - - /* - * Dquot cache miss. We don't want to keep the inode lock across - * a (potential) disk read. Also we don't want to deal with the lock - * ordering between quotainode and this inode. OTOH, dropping the inode - * lock here means dealing with a chown that can happen before - * we re-acquire the lock. - */ - if (ip) - xfs_iunlock(ip, XFS_ILOCK_EXCL); - /* - * Save the hashchain version stamp, and unlock the chain, so that - * we don't keep the lock across a disk read - */ - version = h->qh_version; - mutex_unlock(&h->qh_lock); - - /* - * Allocate the dquot on the kernel heap, and read the ondisk - * portion off the disk. Also, do all the necessary initialization - * This can return ENOENT if dquot didn't exist on disk and we didn't - * ask it to allocate; ESRCH if quotas got turned off suddenly. - */ - if ((error = xfs_qm_idtodq(mp, id, type, - flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR| - XFS_QMOPT_DOWARN), - &dqp))) { - if (ip) - xfs_ilock(ip, XFS_ILOCK_EXCL); - return (error); - } - - /* - * See if this is mount code calling to look at the overall quota limits - * which are stored in the id == 0 user or group's dquot. - * Since we may not have done a quotacheck by this point, just return - * the dquot without attaching it to any hashtables, lists, etc, or even - * taking a reference. - * The caller must dqdestroy this once done. - */ - if (flags & XFS_QMOPT_DQSUSER) { - ASSERT(id == 0); - ASSERT(! ip); - goto dqret; - } - - /* - * Dquot lock comes after hashlock in the lock ordering - */ - if (ip) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - - /* - * A dquot could be attached to this inode by now, since - * we had dropped the ilock. - */ - if (type == XFS_DQ_USER) { - if (!XFS_IS_UQUOTA_ON(mp)) { - /* inode stays locked on return */ - xfs_qm_dqdestroy(dqp); - return XFS_ERROR(ESRCH); - } - if (ip->i_udquot) { - xfs_qm_dqdestroy(dqp); - dqp = ip->i_udquot; - xfs_dqlock(dqp); - goto dqret; - } - } else { - if (!XFS_IS_OQUOTA_ON(mp)) { - /* inode stays locked on return */ - xfs_qm_dqdestroy(dqp); - return XFS_ERROR(ESRCH); - } - if (ip->i_gdquot) { - xfs_qm_dqdestroy(dqp); - dqp = ip->i_gdquot; - xfs_dqlock(dqp); - goto dqret; - } - } - } - - /* - * Hashlock comes after ilock in lock order - */ - mutex_lock(&h->qh_lock); - if (version != h->qh_version) { - xfs_dquot_t *tmpdqp; - /* - * Now, see if somebody else put the dquot in the - * hashtable before us. This can happen because we didn't - * keep the hashchain lock. We don't have to worry about - * lock order between the two dquots here since dqp isn't - * on any findable lists yet. - */ - if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) { - /* - * Duplicate found. Just throw away the new dquot - * and start over. - */ - xfs_qm_dqput(tmpdqp); - mutex_unlock(&h->qh_lock); - xfs_qm_dqdestroy(dqp); - XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); - goto again; - } - } - - /* - * Put the dquot at the beginning of the hash-chain and mp's list - * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock .. - */ - ASSERT(mutex_is_locked(&h->qh_lock)); - dqp->q_hash = h; - list_add(&dqp->q_hashlist, &h->qh_list); - h->qh_version++; - - /* - * Attach this dquot to this filesystem's list of all dquots, - * kept inside the mount structure in m_quotainfo field - */ - mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); - - /* - * We return a locked dquot to the caller, with a reference taken - */ - xfs_dqlock(dqp); - dqp->q_nrefs = 1; - - list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist); - mp->m_quotainfo->qi_dquots++; - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); - mutex_unlock(&h->qh_lock); - dqret: - ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); - trace_xfs_dqget_miss(dqp); - *O_dqpp = dqp; - return (0); -} - - -/* - * Release a reference to the dquot (decrement ref-count) - * and unlock it. If there is a group quota attached to this - * dquot, carefully release that too without tripping over - * deadlocks'n'stuff. - */ -void -xfs_qm_dqput( - xfs_dquot_t *dqp) -{ - xfs_dquot_t *gdqp; - - ASSERT(dqp->q_nrefs > 0); - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - - trace_xfs_dqput(dqp); - - if (dqp->q_nrefs != 1) { - dqp->q_nrefs--; - xfs_dqunlock(dqp); - return; - } - - /* - * drop the dqlock and acquire the freelist and dqlock - * in the right order; but try to get it out-of-order first - */ - if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { - trace_xfs_dqput_wait(dqp); - xfs_dqunlock(dqp); - mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); - xfs_dqlock(dqp); - } - - while (1) { - gdqp = NULL; - - /* We can't depend on nrefs being == 1 here */ - if (--dqp->q_nrefs == 0) { - trace_xfs_dqput_free(dqp); - - list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); - xfs_Gqm->qm_dqfrlist_cnt++; - - /* - * If we just added a udquot to the freelist, then - * we want to release the gdquot reference that - * it (probably) has. Otherwise it'll keep the - * gdquot from getting reclaimed. - */ - if ((gdqp = dqp->q_gdquot)) { - /* - * Avoid a recursive dqput call - */ - xfs_dqlock(gdqp); - dqp->q_gdquot = NULL; - } - } - xfs_dqunlock(dqp); - - /* - * If we had a group quota inside the user quota as a hint, - * release it now. - */ - if (! gdqp) - break; - dqp = gdqp; - } - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); -} - -/* - * Release a dquot. Flush it if dirty, then dqput() it. - * dquot must not be locked. - */ -void -xfs_qm_dqrele( - xfs_dquot_t *dqp) -{ - if (!dqp) - return; - - trace_xfs_dqrele(dqp); - - xfs_dqlock(dqp); - /* - * We don't care to flush it if the dquot is dirty here. - * That will create stutters that we want to avoid. - * Instead we do a delayed write when we try to reclaim - * a dirty dquot. Also xfs_sync will take part of the burden... - */ - xfs_qm_dqput(dqp); -} - -/* - * This is the dquot flushing I/O completion routine. It is called - * from interrupt level when the buffer containing the dquot is - * flushed to disk. It is responsible for removing the dquot logitem - * from the AIL if it has not been re-logged, and unlocking the dquot's - * flush lock. This behavior is very similar to that of inodes.. - */ -STATIC void -xfs_qm_dqflush_done( - struct xfs_buf *bp, - struct xfs_log_item *lip) -{ - xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; - xfs_dquot_t *dqp = qip->qli_dquot; - struct xfs_ail *ailp = lip->li_ailp; - - /* - * We only want to pull the item from the AIL if its - * location in the log has not changed since we started the flush. - * Thus, we only bother if the dquot's lsn has - * not changed. First we check the lsn outside the lock - * since it's cheaper, and then we recheck while - * holding the lock before removing the dquot from the AIL. - */ - if ((lip->li_flags & XFS_LI_IN_AIL) && - lip->li_lsn == qip->qli_flush_lsn) { - - /* xfs_trans_ail_delete() drops the AIL lock. */ - spin_lock(&ailp->xa_lock); - if (lip->li_lsn == qip->qli_flush_lsn) - xfs_trans_ail_delete(ailp, lip); - else - spin_unlock(&ailp->xa_lock); - } - - /* - * Release the dq's flush lock since we're done with it. - */ - xfs_dqfunlock(dqp); -} - -/* - * Write a modified dquot to disk. - * The dquot must be locked and the flush lock too taken by caller. - * The flush lock will not be unlocked until the dquot reaches the disk, - * but the dquot is free to be unlocked and modified by the caller - * in the interim. Dquot is still locked on return. This behavior is - * identical to that of inodes. - */ -int -xfs_qm_dqflush( - xfs_dquot_t *dqp, - uint flags) -{ - struct xfs_mount *mp = dqp->q_mount; - struct xfs_buf *bp; - struct xfs_disk_dquot *ddqp; - int error; - - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(!completion_done(&dqp->q_flush)); - - trace_xfs_dqflush(dqp); - - /* - * If not dirty, or it's pinned and we are not supposed to block, nada. - */ - if (!XFS_DQ_IS_DIRTY(dqp) || - (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { - xfs_dqfunlock(dqp); - return 0; - } - xfs_qm_dqunpin_wait(dqp); - - /* - * This may have been unpinned because the filesystem is shutting - * down forcibly. If that's the case we must not write this dquot - * to disk, because the log record didn't make it to disk! - */ - if (XFS_FORCED_SHUTDOWN(mp)) { - dqp->dq_flags &= ~XFS_DQ_DIRTY; - xfs_dqfunlock(dqp); - return XFS_ERROR(EIO); - } - - /* - * Get the buffer containing the on-disk dquot - */ - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0, &bp); - if (error) { - ASSERT(error != ENOENT); - xfs_dqfunlock(dqp); - return error; - } - - /* - * Calculate the location of the dquot inside the buffer. - */ - ddqp = bp->b_addr + dqp->q_bufoffset; - - /* - * A simple sanity check in case we got a corrupted dquot.. - */ - error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, - XFS_QMOPT_DOWARN, "dqflush (incore copy)"); - if (error) { - xfs_buf_relse(bp); - xfs_dqfunlock(dqp); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return XFS_ERROR(EIO); - } - - /* This is the only portion of data that needs to persist */ - memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); - - /* - * Clear the dirty field and remember the flush lsn for later use. - */ - dqp->dq_flags &= ~XFS_DQ_DIRTY; - - xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, - &dqp->q_logitem.qli_item.li_lsn); - - /* - * Attach an iodone routine so that we can remove this dquot from the - * AIL and release the flush lock once the dquot is synced to disk. - */ - xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, - &dqp->q_logitem.qli_item); - - /* - * If the buffer is pinned then push on the log so we won't - * get stuck waiting in the write for too long. - */ - if (xfs_buf_ispinned(bp)) { - trace_xfs_dqflush_force(dqp); - xfs_log_force(mp, 0); - } - - if (flags & SYNC_WAIT) - error = xfs_bwrite(mp, bp); - else - xfs_bdwrite(mp, bp); - - trace_xfs_dqflush_done(dqp); - - /* - * dqp is still locked, but caller is free to unlock it now. - */ - return error; - -} - -int -xfs_qm_dqlock_nowait( - xfs_dquot_t *dqp) -{ - return mutex_trylock(&dqp->q_qlock); -} - -void -xfs_dqlock( - xfs_dquot_t *dqp) -{ - mutex_lock(&dqp->q_qlock); -} - -void -xfs_dqunlock( - xfs_dquot_t *dqp) -{ - mutex_unlock(&(dqp->q_qlock)); - if (dqp->q_logitem.qli_dquot == dqp) { - /* Once was dqp->q_mount, but might just have been cleared */ - xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, - (xfs_log_item_t*)&(dqp->q_logitem)); - } -} - - -void -xfs_dqunlock_nonotify( - xfs_dquot_t *dqp) -{ - mutex_unlock(&(dqp->q_qlock)); -} - -/* - * Lock two xfs_dquot structures. - * - * To avoid deadlocks we always lock the quota structure with - * the lowerd id first. - */ -void -xfs_dqlock2( - xfs_dquot_t *d1, - xfs_dquot_t *d2) -{ - if (d1 && d2) { - ASSERT(d1 != d2); - if (be32_to_cpu(d1->q_core.d_id) > - be32_to_cpu(d2->q_core.d_id)) { - mutex_lock(&d2->q_qlock); - mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED); - } else { - mutex_lock(&d1->q_qlock); - mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED); - } - } else if (d1) { - mutex_lock(&d1->q_qlock); - } else if (d2) { - mutex_lock(&d2->q_qlock); - } -} - - -/* - * Take a dquot out of the mount's dqlist as well as the hashlist. - * This is called via unmount as well as quotaoff, and the purge - * will always succeed unless there are soft (temp) references - * outstanding. - * - * This returns 0 if it was purged, 1 if it wasn't. It's not an error code - * that we're returning! XXXsup - not cool. - */ -/* ARGSUSED */ -int -xfs_qm_dqpurge( - xfs_dquot_t *dqp) -{ - xfs_dqhash_t *qh = dqp->q_hash; - xfs_mount_t *mp = dqp->q_mount; - - ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock)); - ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock)); - - xfs_dqlock(dqp); - /* - * We really can't afford to purge a dquot that is - * referenced, because these are hard refs. - * It shouldn't happen in general because we went thru _all_ inodes in - * dqrele_all_inodes before calling this and didn't let the mountlock go. - * However it is possible that we have dquots with temporary - * references that are not attached to an inode. e.g. see xfs_setattr(). - */ - if (dqp->q_nrefs != 0) { - xfs_dqunlock(dqp); - mutex_unlock(&dqp->q_hash->qh_lock); - return (1); - } - - ASSERT(!list_empty(&dqp->q_freelist)); - - /* - * If we're turning off quotas, we have to make sure that, for - * example, we don't delete quota disk blocks while dquots are - * in the process of getting written to those disk blocks. - * This dquot might well be on AIL, and we can't leave it there - * if we're turning off quotas. Basically, we need this flush - * lock, and are willing to block on it. - */ - if (!xfs_dqflock_nowait(dqp)) { - /* - * Block on the flush lock after nudging dquot buffer, - * if it is incore. - */ - xfs_qm_dqflock_pushbuf_wait(dqp); - } - - /* - * XXXIf we're turning this type of quotas off, we don't care - * about the dirty metadata sitting in this dquot. OTOH, if - * we're unmounting, we do care, so we flush it and wait. - */ - if (XFS_DQ_IS_DIRTY(dqp)) { - int error; - - /* dqflush unlocks dqflock */ - /* - * Given that dqpurge is a very rare occurrence, it is OK - * that we're holding the hashlist and mplist locks - * across the disk write. But, ... XXXsup - * - * We don't care about getting disk errors here. We need - * to purge this dquot anyway, so we go ahead regardless. - */ - error = xfs_qm_dqflush(dqp, SYNC_WAIT); - if (error) - xfs_warn(mp, "%s: dquot %p flush failed", - __func__, dqp); - xfs_dqflock(dqp); - } - ASSERT(atomic_read(&dqp->q_pincount) == 0); - ASSERT(XFS_FORCED_SHUTDOWN(mp) || - !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); - - list_del_init(&dqp->q_hashlist); - qh->qh_version++; - list_del_init(&dqp->q_mplist); - mp->m_quotainfo->qi_dqreclaims++; - mp->m_quotainfo->qi_dquots--; - /* - * XXX Move this to the front of the freelist, if we can get the - * freelist lock. - */ - ASSERT(!list_empty(&dqp->q_freelist)); - - dqp->q_mount = NULL; - dqp->q_hash = NULL; - dqp->dq_flags = XFS_DQ_INACTIVE; - memset(&dqp->q_core, 0, sizeof(dqp->q_core)); - xfs_dqfunlock(dqp); - xfs_dqunlock(dqp); - mutex_unlock(&qh->qh_lock); - return (0); -} - - -/* - * Give the buffer a little push if it is incore and - * wait on the flush lock. - */ -void -xfs_qm_dqflock_pushbuf_wait( - xfs_dquot_t *dqp) -{ - xfs_mount_t *mp = dqp->q_mount; - xfs_buf_t *bp; - - /* - * Check to see if the dquot has been flushed delayed - * write. If so, grab its buffer and send it - * out immediately. We'll be able to acquire - * the flush lock when the I/O completes. - */ - bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); - if (!bp) - goto out_lock; - - if (XFS_BUF_ISDELAYWRITE(bp)) { - if (xfs_buf_ispinned(bp)) - xfs_log_force(mp, 0); - xfs_buf_delwri_promote(bp); - wake_up_process(bp->b_target->bt_task); - } - xfs_buf_relse(bp); -out_lock: - xfs_dqflock(dqp); -} diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h deleted file mode 100644 index 34b7e94..0000000 --- a/fs/xfs/quota/xfs_dquot.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DQUOT_H__ -#define __XFS_DQUOT_H__ - -/* - * Dquots are structures that hold quota information about a user or a group, - * much like inodes are for files. In fact, dquots share many characteristics - * with inodes. However, dquots can also be a centralized resource, relative - * to a collection of inodes. In this respect, dquots share some characteristics - * of the superblock. - * XFS dquots exploit both those in its algorithms. They make every attempt - * to not be a bottleneck when quotas are on and have minimal impact, if any, - * when quotas are off. - */ - -/* - * The hash chain headers (hash buckets) - */ -typedef struct xfs_dqhash { - struct list_head qh_list; - struct mutex qh_lock; - uint qh_version; /* ever increasing version */ - uint qh_nelems; /* number of dquots on the list */ -} xfs_dqhash_t; - -struct xfs_mount; -struct xfs_trans; - -/* - * The incore dquot structure - */ -typedef struct xfs_dquot { - uint dq_flags; /* various flags (XFS_DQ_*) */ - struct list_head q_freelist; /* global free list of dquots */ - struct list_head q_mplist; /* mount's list of dquots */ - struct list_head q_hashlist; /* gloabl hash list of dquots */ - xfs_dqhash_t *q_hash; /* the hashchain header */ - struct xfs_mount*q_mount; /* filesystem this relates to */ - struct xfs_trans*q_transp; /* trans this belongs to currently */ - uint q_nrefs; /* # active refs from inodes */ - xfs_daddr_t q_blkno; /* blkno of dquot buffer */ - int q_bufoffset; /* off of dq in buffer (# dquots) */ - xfs_fileoff_t q_fileoffset; /* offset in quotas file */ - - struct xfs_dquot*q_gdquot; /* group dquot, hint only */ - xfs_disk_dquot_t q_core; /* actual usage & quotas */ - xfs_dq_logitem_t q_logitem; /* dquot log item */ - xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ - xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ - xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ - struct mutex q_qlock; /* quota lock */ - struct completion q_flush; /* flush completion queue */ - atomic_t q_pincount; /* dquot pin count */ - wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ -} xfs_dquot_t; - -/* - * Lock hierarchy for q_qlock: - * XFS_QLOCK_NORMAL is the implicit default, - * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 - */ -enum { - XFS_QLOCK_NORMAL = 0, - XFS_QLOCK_NESTED, -}; - -#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) - -/* - * Manage the q_flush completion queue embedded in the dquot. This completion - * queue synchronizes processes attempting to flush the in-core dquot back to - * disk. - */ -static inline void xfs_dqflock(xfs_dquot_t *dqp) -{ - wait_for_completion(&dqp->q_flush); -} - -static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) -{ - return try_wait_for_completion(&dqp->q_flush); -} - -static inline void xfs_dqfunlock(xfs_dquot_t *dqp) -{ - complete(&dqp->q_flush); -} - -#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) -#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) -#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) -#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) -#define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) -#define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo) -#define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \ - XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ - XFS_DQ_TO_QINF(dqp)->qi_gquotaip) - -#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \ - (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ - (XFS_IS_OQUOTA_ON((d)->q_mount)))) - -extern void xfs_qm_dqdestroy(xfs_dquot_t *); -extern int xfs_qm_dqflush(xfs_dquot_t *, uint); -extern int xfs_qm_dqpurge(xfs_dquot_t *); -extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); -extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); -extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp); -extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, - xfs_disk_dquot_t *); -extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, - xfs_disk_dquot_t *); -extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, - xfs_dqid_t, uint, uint, xfs_dquot_t **); -extern void xfs_qm_dqput(xfs_dquot_t *); -extern void xfs_dqlock(xfs_dquot_t *); -extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *); -extern void xfs_dqunlock(xfs_dquot_t *); -extern void xfs_dqunlock_nonotify(xfs_dquot_t *); - -#endif /* __XFS_DQUOT_H__ */ diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c deleted file mode 100644 index 9e0e2fa..0000000 --- a/fs/xfs/quota/xfs_dquot_item.c +++ /dev/null @@ -1,529 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_trans_priv.h" -#include "xfs_qm.h" - -static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip) -{ - return container_of(lip, struct xfs_dq_logitem, qli_item); -} - -/* - * returns the number of iovecs needed to log the given dquot item. - */ -STATIC uint -xfs_qm_dquot_logitem_size( - struct xfs_log_item *lip) -{ - /* - * we need only two iovecs, one for the format, one for the real thing - */ - return 2; -} - -/* - * fills in the vector of log iovecs for the given dquot log item. - */ -STATIC void -xfs_qm_dquot_logitem_format( - struct xfs_log_item *lip, - struct xfs_log_iovec *logvec) -{ - struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); - - logvec->i_addr = &qlip->qli_format; - logvec->i_len = sizeof(xfs_dq_logformat_t); - logvec->i_type = XLOG_REG_TYPE_QFORMAT; - logvec++; - logvec->i_addr = &qlip->qli_dquot->q_core; - logvec->i_len = sizeof(xfs_disk_dquot_t); - logvec->i_type = XLOG_REG_TYPE_DQUOT; - - ASSERT(2 == lip->li_desc->lid_size); - qlip->qli_format.qlf_size = 2; - -} - -/* - * Increment the pin count of the given dquot. - */ -STATIC void -xfs_qm_dquot_logitem_pin( - struct xfs_log_item *lip) -{ - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - atomic_inc(&dqp->q_pincount); -} - -/* - * Decrement the pin count of the given dquot, and wake up - * anyone in xfs_dqwait_unpin() if the count goes to 0. The - * dquot must have been previously pinned with a call to - * xfs_qm_dquot_logitem_pin(). - */ -STATIC void -xfs_qm_dquot_logitem_unpin( - struct xfs_log_item *lip, - int remove) -{ - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - - ASSERT(atomic_read(&dqp->q_pincount) > 0); - if (atomic_dec_and_test(&dqp->q_pincount)) - wake_up(&dqp->q_pinwait); -} - -/* - * Given the logitem, this writes the corresponding dquot entry to disk - * asynchronously. This is called with the dquot entry securely locked; - * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot - * at the end. - */ -STATIC void -xfs_qm_dquot_logitem_push( - struct xfs_log_item *lip) -{ - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - int error; - - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(!completion_done(&dqp->q_flush)); - - /* - * Since we were able to lock the dquot's flush lock and - * we found it on the AIL, the dquot must be dirty. This - * is because the dquot is removed from the AIL while still - * holding the flush lock in xfs_dqflush_done(). Thus, if - * we found it in the AIL and were able to obtain the flush - * lock without sleeping, then there must not have been - * anyone in the process of flushing the dquot. - */ - error = xfs_qm_dqflush(dqp, 0); - if (error) - xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", - __func__, error, dqp); - xfs_dqunlock(dqp); -} - -STATIC xfs_lsn_t -xfs_qm_dquot_logitem_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - /* - * We always re-log the entire dquot when it becomes dirty, - * so, the latest copy _is_ the only one that matters. - */ - return lsn; -} - -/* - * This is called to wait for the given dquot to be unpinned. - * Most of these pin/unpin routines are plagiarized from inode code. - */ -void -xfs_qm_dqunpin_wait( - struct xfs_dquot *dqp) -{ - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - if (atomic_read(&dqp->q_pincount) == 0) - return; - - /* - * Give the log a push so we don't wait here too long. - */ - xfs_log_force(dqp->q_mount, 0); - wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); -} - -/* - * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that - * the dquot is locked by us, but the flush lock isn't. So, here we are - * going to see if the relevant dquot buffer is incore, waiting on DELWRI. - * If so, we want to push it out to help us take this item off the AIL as soon - * as possible. - * - * We must not be holding the AIL lock at this point. Calling incore() to - * search the buffer cache can be a time consuming thing, and AIL lock is a - * spinlock. - */ -STATIC void -xfs_qm_dquot_logitem_pushbuf( - struct xfs_log_item *lip) -{ - struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); - struct xfs_dquot *dqp = qlip->qli_dquot; - struct xfs_buf *bp; - - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - - /* - * If flushlock isn't locked anymore, chances are that the - * inode flush completed and the inode was taken off the AIL. - * So, just get out. - */ - if (completion_done(&dqp->q_flush) || - !(lip->li_flags & XFS_LI_IN_AIL)) { - xfs_dqunlock(dqp); - return; - } - - bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, - dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); - xfs_dqunlock(dqp); - if (!bp) - return; - if (XFS_BUF_ISDELAYWRITE(bp)) - xfs_buf_delwri_promote(bp); - xfs_buf_relse(bp); -} - -/* - * This is called to attempt to lock the dquot associated with this - * dquot log item. Don't sleep on the dquot lock or the flush lock. - * If the flush lock is already held, indicating that the dquot has - * been or is in the process of being flushed, then see if we can - * find the dquot's buffer in the buffer cache without sleeping. If - * we can and it is marked delayed write, then we want to send it out. - * We delay doing so until the push routine, though, to avoid sleeping - * in any device strategy routines. - */ -STATIC uint -xfs_qm_dquot_logitem_trylock( - struct xfs_log_item *lip) -{ - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - - if (atomic_read(&dqp->q_pincount) > 0) - return XFS_ITEM_PINNED; - - if (!xfs_qm_dqlock_nowait(dqp)) - return XFS_ITEM_LOCKED; - - if (!xfs_dqflock_nowait(dqp)) { - /* - * dquot has already been flushed to the backing buffer, - * leave it locked, pushbuf routine will unlock it. - */ - return XFS_ITEM_PUSHBUF; - } - - ASSERT(lip->li_flags & XFS_LI_IN_AIL); - return XFS_ITEM_SUCCESS; -} - -/* - * Unlock the dquot associated with the log item. - * Clear the fields of the dquot and dquot log item that - * are specific to the current transaction. If the - * hold flags is set, do not unlock the dquot. - */ -STATIC void -xfs_qm_dquot_logitem_unlock( - struct xfs_log_item *lip) -{ - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - - /* - * Clear the transaction pointer in the dquot - */ - dqp->q_transp = NULL; - - /* - * dquots are never 'held' from getting unlocked at the end of - * a transaction. Their locking and unlocking is hidden inside the - * transaction layer, within trans_commit. Hence, no LI_HOLD flag - * for the logitem. - */ - xfs_dqunlock(dqp); -} - -/* - * this needs to stamp an lsn into the dquot, I think. - * rpc's that look at user dquot's would then have to - * push on the dependency recorded in the dquot - */ -STATIC void -xfs_qm_dquot_logitem_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ -} - -/* - * This is the ops vector for dquots - */ -static struct xfs_item_ops xfs_dquot_item_ops = { - .iop_size = xfs_qm_dquot_logitem_size, - .iop_format = xfs_qm_dquot_logitem_format, - .iop_pin = xfs_qm_dquot_logitem_pin, - .iop_unpin = xfs_qm_dquot_logitem_unpin, - .iop_trylock = xfs_qm_dquot_logitem_trylock, - .iop_unlock = xfs_qm_dquot_logitem_unlock, - .iop_committed = xfs_qm_dquot_logitem_committed, - .iop_push = xfs_qm_dquot_logitem_push, - .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf, - .iop_committing = xfs_qm_dquot_logitem_committing -}; - -/* - * Initialize the dquot log item for a newly allocated dquot. - * The dquot isn't locked at this point, but it isn't on any of the lists - * either, so we don't care. - */ -void -xfs_qm_dquot_logitem_init( - struct xfs_dquot *dqp) -{ - struct xfs_dq_logitem *lp = &dqp->q_logitem; - - xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, - &xfs_dquot_item_ops); - lp->qli_dquot = dqp; - lp->qli_format.qlf_type = XFS_LI_DQUOT; - lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); - lp->qli_format.qlf_blkno = dqp->q_blkno; - lp->qli_format.qlf_len = 1; - /* - * This is just the offset of this dquot within its buffer - * (which is currently 1 FSB and probably won't change). - * Hence 32 bits for this offset should be just fine. - * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t)) - * here, and recompute it at recovery time. - */ - lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset; -} - -/*------------------ QUOTAOFF LOG ITEMS -------------------*/ - -static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip) -{ - return container_of(lip, struct xfs_qoff_logitem, qql_item); -} - - -/* - * This returns the number of iovecs needed to log the given quotaoff item. - * We only need 1 iovec for an quotaoff item. It just logs the - * quotaoff_log_format structure. - */ -STATIC uint -xfs_qm_qoff_logitem_size( - struct xfs_log_item *lip) -{ - return 1; -} - -/* - * This is called to fill in the vector of log iovecs for the - * given quotaoff log item. We use only 1 iovec, and we point that - * at the quotaoff_log_format structure embedded in the quotaoff item. - * It is at this point that we assert that all of the extent - * slots in the quotaoff item have been filled. - */ -STATIC void -xfs_qm_qoff_logitem_format( - struct xfs_log_item *lip, - struct xfs_log_iovec *log_vector) -{ - struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); - - ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF); - - log_vector->i_addr = &qflip->qql_format; - log_vector->i_len = sizeof(xfs_qoff_logitem_t); - log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; - qflip->qql_format.qf_size = 1; -} - -/* - * Pinning has no meaning for an quotaoff item, so just return. - */ -STATIC void -xfs_qm_qoff_logitem_pin( - struct xfs_log_item *lip) -{ -} - -/* - * Since pinning has no meaning for an quotaoff item, unpinning does - * not either. - */ -STATIC void -xfs_qm_qoff_logitem_unpin( - struct xfs_log_item *lip, - int remove) -{ -} - -/* - * Quotaoff items have no locking, so just return success. - */ -STATIC uint -xfs_qm_qoff_logitem_trylock( - struct xfs_log_item *lip) -{ - return XFS_ITEM_LOCKED; -} - -/* - * Quotaoff items have no locking or pushing, so return failure - * so that the caller doesn't bother with us. - */ -STATIC void -xfs_qm_qoff_logitem_unlock( - struct xfs_log_item *lip) -{ -} - -/* - * The quotaoff-start-item is logged only once and cannot be moved in the log, - * so simply return the lsn at which it's been logged. - */ -STATIC xfs_lsn_t -xfs_qm_qoff_logitem_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - return lsn; -} - -/* - * There isn't much you can do to push on an quotaoff item. It is simply - * stuck waiting for the log to be flushed to disk. - */ -STATIC void -xfs_qm_qoff_logitem_push( - struct xfs_log_item *lip) -{ -} - - -STATIC xfs_lsn_t -xfs_qm_qoffend_logitem_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip); - struct xfs_qoff_logitem *qfs = qfe->qql_start_lip; - struct xfs_ail *ailp = qfs->qql_item.li_ailp; - - /* - * Delete the qoff-start logitem from the AIL. - * xfs_trans_ail_delete() drops the AIL lock. - */ - spin_lock(&ailp->xa_lock); - xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs); - - kmem_free(qfs); - kmem_free(qfe); - return (xfs_lsn_t)-1; -} - -/* - * XXX rcc - don't know quite what to do with this. I think we can - * just ignore it. The only time that isn't the case is if we allow - * the client to somehow see that quotas have been turned off in which - * we can't allow that to get back until the quotaoff hits the disk. - * So how would that happen? Also, do we need different routines for - * quotaoff start and quotaoff end? I suspect the answer is yes but - * to be sure, I need to look at the recovery code and see how quota off - * recovery is handled (do we roll forward or back or do something else). - * If we roll forwards or backwards, then we need two separate routines, - * one that does nothing and one that stamps in the lsn that matters - * (truly makes the quotaoff irrevocable). If we do something else, - * then maybe we don't need two. - */ -STATIC void -xfs_qm_qoff_logitem_committing( - struct xfs_log_item *lip, - xfs_lsn_t commit_lsn) -{ -} - -static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { - .iop_size = xfs_qm_qoff_logitem_size, - .iop_format = xfs_qm_qoff_logitem_format, - .iop_pin = xfs_qm_qoff_logitem_pin, - .iop_unpin = xfs_qm_qoff_logitem_unpin, - .iop_trylock = xfs_qm_qoff_logitem_trylock, - .iop_unlock = xfs_qm_qoff_logitem_unlock, - .iop_committed = xfs_qm_qoffend_logitem_committed, - .iop_push = xfs_qm_qoff_logitem_push, - .iop_committing = xfs_qm_qoff_logitem_committing -}; - -/* - * This is the ops vector shared by all quotaoff-start log items. - */ -static struct xfs_item_ops xfs_qm_qoff_logitem_ops = { - .iop_size = xfs_qm_qoff_logitem_size, - .iop_format = xfs_qm_qoff_logitem_format, - .iop_pin = xfs_qm_qoff_logitem_pin, - .iop_unpin = xfs_qm_qoff_logitem_unpin, - .iop_trylock = xfs_qm_qoff_logitem_trylock, - .iop_unlock = xfs_qm_qoff_logitem_unlock, - .iop_committed = xfs_qm_qoff_logitem_committed, - .iop_push = xfs_qm_qoff_logitem_push, - .iop_committing = xfs_qm_qoff_logitem_committing -}; - -/* - * Allocate and initialize an quotaoff item of the correct quota type(s). - */ -struct xfs_qoff_logitem * -xfs_qm_qoff_logitem_init( - struct xfs_mount *mp, - struct xfs_qoff_logitem *start, - uint flags) -{ - struct xfs_qoff_logitem *qf; - - qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP); - - xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? - &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); - qf->qql_item.li_mountp = mp; - qf->qql_format.qf_type = XFS_LI_QUOTAOFF; - qf->qql_format.qf_flags = flags; - qf->qql_start_lip = start; - return qf; -} diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h deleted file mode 100644 index 5acae2a..0000000 --- a/fs/xfs/quota/xfs_dquot_item.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DQUOT_ITEM_H__ -#define __XFS_DQUOT_ITEM_H__ - -struct xfs_dquot; -struct xfs_trans; -struct xfs_mount; -struct xfs_qoff_logitem; - -typedef struct xfs_dq_logitem { - xfs_log_item_t qli_item; /* common portion */ - struct xfs_dquot *qli_dquot; /* dquot ptr */ - xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ - xfs_dq_logformat_t qli_format; /* logged structure */ -} xfs_dq_logitem_t; - -typedef struct xfs_qoff_logitem { - xfs_log_item_t qql_item; /* common portion */ - struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ - xfs_qoff_logformat_t qql_format; /* logged structure */ -} xfs_qoff_logitem_t; - - -extern void xfs_qm_dquot_logitem_init(struct xfs_dquot *); -extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *, - struct xfs_qoff_logitem *, uint); -extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *, - struct xfs_qoff_logitem *, uint); -extern void xfs_trans_log_quotaoff_item(struct xfs_trans *, - struct xfs_qoff_logitem *); - -#endif /* __XFS_DQUOT_ITEM_H__ */ diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c deleted file mode 100644 index 9a0aa76..0000000 --- a/fs/xfs/quota/xfs_qm.c +++ /dev/null @@ -1,2416 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_ialloc.h" -#include "xfs_itable.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_bmap.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_trans_space.h" -#include "xfs_utils.h" -#include "xfs_qm.h" -#include "xfs_trace.h" - -/* - * The global quota manager. There is only one of these for the entire - * system, _not_ one per file system. XQM keeps track of the overall - * quota functionality, including maintaining the freelist and hash - * tables of dquots. - */ -struct mutex xfs_Gqm_lock; -struct xfs_qm *xfs_Gqm; -uint ndquot; - -kmem_zone_t *qm_dqzone; -kmem_zone_t *qm_dqtrxzone; - -STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); -STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); - -STATIC int xfs_qm_init_quotainos(xfs_mount_t *); -STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); -STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); - -static struct shrinker xfs_qm_shaker = { - .shrink = xfs_qm_shake, - .seeks = DEFAULT_SEEKS, -}; - -/* - * Initialize the XQM structure. - * Note that there is not one quota manager per file system. - */ -STATIC struct xfs_qm * -xfs_Gqm_init(void) -{ - xfs_dqhash_t *udqhash, *gdqhash; - xfs_qm_t *xqm; - size_t hsize; - uint i; - - /* - * Initialize the dquot hash tables. - */ - udqhash = kmem_zalloc_greedy(&hsize, - XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), - XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t)); - if (!udqhash) - goto out; - - gdqhash = kmem_zalloc_large(hsize); - if (!gdqhash) - goto out_free_udqhash; - - hsize /= sizeof(xfs_dqhash_t); - ndquot = hsize << 8; - - xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); - xqm->qm_dqhashmask = hsize - 1; - xqm->qm_usr_dqhtable = udqhash; - xqm->qm_grp_dqhtable = gdqhash; - ASSERT(xqm->qm_usr_dqhtable != NULL); - ASSERT(xqm->qm_grp_dqhtable != NULL); - - for (i = 0; i < hsize; i++) { - xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i); - xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i); - } - - /* - * Freelist of all dquots of all file systems - */ - INIT_LIST_HEAD(&xqm->qm_dqfrlist); - xqm->qm_dqfrlist_cnt = 0; - mutex_init(&xqm->qm_dqfrlist_lock); - - /* - * dquot zone. we register our own low-memory callback. - */ - if (!qm_dqzone) { - xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t), - "xfs_dquots"); - qm_dqzone = xqm->qm_dqzone; - } else - xqm->qm_dqzone = qm_dqzone; - - register_shrinker(&xfs_qm_shaker); - - /* - * The t_dqinfo portion of transactions. - */ - if (!qm_dqtrxzone) { - xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t), - "xfs_dqtrx"); - qm_dqtrxzone = xqm->qm_dqtrxzone; - } else - xqm->qm_dqtrxzone = qm_dqtrxzone; - - atomic_set(&xqm->qm_totaldquots, 0); - xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; - xqm->qm_nrefs = 0; - return xqm; - - out_free_udqhash: - kmem_free_large(udqhash); - out: - return NULL; -} - -/* - * Destroy the global quota manager when its reference count goes to zero. - */ -STATIC void -xfs_qm_destroy( - struct xfs_qm *xqm) -{ - struct xfs_dquot *dqp, *n; - int hsize, i; - - ASSERT(xqm != NULL); - ASSERT(xqm->qm_nrefs == 0); - unregister_shrinker(&xfs_qm_shaker); - hsize = xqm->qm_dqhashmask + 1; - for (i = 0; i < hsize; i++) { - xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); - xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); - } - kmem_free_large(xqm->qm_usr_dqhtable); - kmem_free_large(xqm->qm_grp_dqhtable); - xqm->qm_usr_dqhtable = NULL; - xqm->qm_grp_dqhtable = NULL; - xqm->qm_dqhashmask = 0; - - /* frlist cleanup */ - mutex_lock(&xqm->qm_dqfrlist_lock); - list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { - xfs_dqlock(dqp); - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - xfs_dqunlock(dqp); - xfs_qm_dqdestroy(dqp); - } - mutex_unlock(&xqm->qm_dqfrlist_lock); - mutex_destroy(&xqm->qm_dqfrlist_lock); - kmem_free(xqm); -} - -/* - * Called at mount time to let XQM know that another file system is - * starting quotas. This isn't crucial information as the individual mount - * structures are pretty independent, but it helps the XQM keep a - * global view of what's going on. - */ -/* ARGSUSED */ -STATIC int -xfs_qm_hold_quotafs_ref( - struct xfs_mount *mp) -{ - /* - * Need to lock the xfs_Gqm structure for things like this. For example, - * the structure could disappear between the entry to this routine and - * a HOLD operation if not locked. - */ - mutex_lock(&xfs_Gqm_lock); - - if (!xfs_Gqm) { - xfs_Gqm = xfs_Gqm_init(); - if (!xfs_Gqm) { - mutex_unlock(&xfs_Gqm_lock); - return ENOMEM; - } - } - - /* - * We can keep a list of all filesystems with quotas mounted for - * debugging and statistical purposes, but ... - * Just take a reference and get out. - */ - xfs_Gqm->qm_nrefs++; - mutex_unlock(&xfs_Gqm_lock); - - return 0; -} - - -/* - * Release the reference that a filesystem took at mount time, - * so that we know when we need to destroy the entire quota manager. - */ -/* ARGSUSED */ -STATIC void -xfs_qm_rele_quotafs_ref( - struct xfs_mount *mp) -{ - xfs_dquot_t *dqp, *n; - - ASSERT(xfs_Gqm); - ASSERT(xfs_Gqm->qm_nrefs > 0); - - /* - * Go thru the freelist and destroy all inactive dquots. - */ - mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); - - list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) { - xfs_dqlock(dqp); - if (dqp->dq_flags & XFS_DQ_INACTIVE) { - ASSERT(dqp->q_mount == NULL); - ASSERT(! XFS_DQ_IS_DIRTY(dqp)); - ASSERT(list_empty(&dqp->q_hashlist)); - ASSERT(list_empty(&dqp->q_mplist)); - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - xfs_dqunlock(dqp); - xfs_qm_dqdestroy(dqp); - } else { - xfs_dqunlock(dqp); - } - } - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - - /* - * Destroy the entire XQM. If somebody mounts with quotaon, this'll - * be restarted. - */ - mutex_lock(&xfs_Gqm_lock); - if (--xfs_Gqm->qm_nrefs == 0) { - xfs_qm_destroy(xfs_Gqm); - xfs_Gqm = NULL; - } - mutex_unlock(&xfs_Gqm_lock); -} - -/* - * Just destroy the quotainfo structure. - */ -void -xfs_qm_unmount( - struct xfs_mount *mp) -{ - if (mp->m_quotainfo) { - xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); - xfs_qm_destroy_quotainfo(mp); - } -} - - -/* - * This is called from xfs_mountfs to start quotas and initialize all - * necessary data structures like quotainfo. This is also responsible for - * running a quotacheck as necessary. We are guaranteed that the superblock - * is consistently read in at this point. - * - * If we fail here, the mount will continue with quota turned off. We don't - * need to inidicate success or failure at all. - */ -void -xfs_qm_mount_quotas( - xfs_mount_t *mp) -{ - int error = 0; - uint sbf; - - /* - * If quotas on realtime volumes is not supported, we disable - * quotas immediately. - */ - if (mp->m_sb.sb_rextents) { - xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); - mp->m_qflags = 0; - goto write_changes; - } - - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - /* - * Allocate the quotainfo structure inside the mount struct, and - * create quotainode(s), and change/rev superblock if necessary. - */ - error = xfs_qm_init_quotainfo(mp); - if (error) { - /* - * We must turn off quotas. - */ - ASSERT(mp->m_quotainfo == NULL); - mp->m_qflags = 0; - goto write_changes; - } - /* - * If any of the quotas are not consistent, do a quotacheck. - */ - if (XFS_QM_NEED_QUOTACHECK(mp)) { - error = xfs_qm_quotacheck(mp); - if (error) { - /* Quotacheck failed and disabled quotas. */ - return; - } - } - /* - * If one type of quotas is off, then it will lose its - * quotachecked status, since we won't be doing accounting for - * that type anymore. - */ - if (!XFS_IS_UQUOTA_ON(mp)) - mp->m_qflags &= ~XFS_UQUOTA_CHKD; - if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) - mp->m_qflags &= ~XFS_OQUOTA_CHKD; - - write_changes: - /* - * We actually don't have to acquire the m_sb_lock at all. - * This can only be called from mount, and that's single threaded. XXX - */ - spin_lock(&mp->m_sb_lock); - sbf = mp->m_sb.sb_qflags; - mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; - spin_unlock(&mp->m_sb_lock); - - if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { - if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { - /* - * We could only have been turning quotas off. - * We aren't in very good shape actually because - * the incore structures are convinced that quotas are - * off, but the on disk superblock doesn't know that ! - */ - ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); - xfs_alert(mp, "%s: Superblock update failed!", - __func__); - } - } - - if (error) { - xfs_warn(mp, "Failed to initialize disk quotas."); - return; - } -} - -/* - * Called from the vfsops layer. - */ -void -xfs_qm_unmount_quotas( - xfs_mount_t *mp) -{ - /* - * Release the dquots that root inode, et al might be holding, - * before we flush quotas and blow away the quotainfo structure. - */ - ASSERT(mp->m_rootip); - xfs_qm_dqdetach(mp->m_rootip); - if (mp->m_rbmip) - xfs_qm_dqdetach(mp->m_rbmip); - if (mp->m_rsumip) - xfs_qm_dqdetach(mp->m_rsumip); - - /* - * Release the quota inodes. - */ - if (mp->m_quotainfo) { - if (mp->m_quotainfo->qi_uquotaip) { - IRELE(mp->m_quotainfo->qi_uquotaip); - mp->m_quotainfo->qi_uquotaip = NULL; - } - if (mp->m_quotainfo->qi_gquotaip) { - IRELE(mp->m_quotainfo->qi_gquotaip); - mp->m_quotainfo->qi_gquotaip = NULL; - } - } -} - -/* - * Flush all dquots of the given file system to disk. The dquots are - * _not_ purged from memory here, just their data written to disk. - */ -STATIC int -xfs_qm_dqflush_all( - struct xfs_mount *mp, - int sync_mode) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - int recl; - struct xfs_dquot *dqp; - int error; - - if (!q) - return 0; -again: - mutex_lock(&q->qi_dqlist_lock); - list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { - xfs_dqlock(dqp); - if (! XFS_DQ_IS_DIRTY(dqp)) { - xfs_dqunlock(dqp); - continue; - } - - /* XXX a sentinel would be better */ - recl = q->qi_dqreclaims; - if (!xfs_dqflock_nowait(dqp)) { - /* - * If we can't grab the flush lock then check - * to see if the dquot has been flushed delayed - * write. If so, grab its buffer and send it - * out immediately. We'll be able to acquire - * the flush lock when the I/O completes. - */ - xfs_qm_dqflock_pushbuf_wait(dqp); - } - /* - * Let go of the mplist lock. We don't want to hold it - * across a disk write. - */ - mutex_unlock(&q->qi_dqlist_lock); - error = xfs_qm_dqflush(dqp, sync_mode); - xfs_dqunlock(dqp); - if (error) - return error; - - mutex_lock(&q->qi_dqlist_lock); - if (recl != q->qi_dqreclaims) { - mutex_unlock(&q->qi_dqlist_lock); - /* XXX restart limit */ - goto again; - } - } - - mutex_unlock(&q->qi_dqlist_lock); - /* return ! busy */ - return 0; -} -/* - * Release the group dquot pointers the user dquots may be - * carrying around as a hint. mplist is locked on entry and exit. - */ -STATIC void -xfs_qm_detach_gdquots( - struct xfs_mount *mp) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - struct xfs_dquot *dqp, *gdqp; - int nrecl; - - again: - ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); - list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { - xfs_dqlock(dqp); - if ((gdqp = dqp->q_gdquot)) { - xfs_dqlock(gdqp); - dqp->q_gdquot = NULL; - } - xfs_dqunlock(dqp); - - if (gdqp) { - /* - * Can't hold the mplist lock across a dqput. - * XXXmust convert to marker based iterations here. - */ - nrecl = q->qi_dqreclaims; - mutex_unlock(&q->qi_dqlist_lock); - xfs_qm_dqput(gdqp); - - mutex_lock(&q->qi_dqlist_lock); - if (nrecl != q->qi_dqreclaims) - goto again; - } - } -} - -/* - * Go through all the incore dquots of this file system and take them - * off the mplist and hashlist, if the dquot type matches the dqtype - * parameter. This is used when turning off quota accounting for - * users and/or groups, as well as when the filesystem is unmounting. - */ -STATIC int -xfs_qm_dqpurge_int( - struct xfs_mount *mp, - uint flags) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - struct xfs_dquot *dqp, *n; - uint dqtype; - int nrecl; - int nmisses; - - if (!q) - return 0; - - dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; - dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; - dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; - - mutex_lock(&q->qi_dqlist_lock); - - /* - * In the first pass through all incore dquots of this filesystem, - * we release the group dquot pointers the user dquots may be - * carrying around as a hint. We need to do this irrespective of - * what's being turned off. - */ - xfs_qm_detach_gdquots(mp); - - again: - nmisses = 0; - ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); - /* - * Try to get rid of all of the unwanted dquots. The idea is to - * get them off mplist and hashlist, but leave them on freelist. - */ - list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) { - /* - * It's OK to look at the type without taking dqlock here. - * We're holding the mplist lock here, and that's needed for - * a dqreclaim. - */ - if ((dqp->dq_flags & dqtype) == 0) - continue; - - if (!mutex_trylock(&dqp->q_hash->qh_lock)) { - nrecl = q->qi_dqreclaims; - mutex_unlock(&q->qi_dqlist_lock); - mutex_lock(&dqp->q_hash->qh_lock); - mutex_lock(&q->qi_dqlist_lock); - - /* - * XXXTheoretically, we can get into a very long - * ping pong game here. - * No one can be adding dquots to the mplist at - * this point, but somebody might be taking things off. - */ - if (nrecl != q->qi_dqreclaims) { - mutex_unlock(&dqp->q_hash->qh_lock); - goto again; - } - } - - /* - * Take the dquot off the mplist and hashlist. It may remain on - * freelist in INACTIVE state. - */ - nmisses += xfs_qm_dqpurge(dqp); - } - mutex_unlock(&q->qi_dqlist_lock); - return nmisses; -} - -int -xfs_qm_dqpurge_all( - xfs_mount_t *mp, - uint flags) -{ - int ndquots; - - /* - * Purge the dquot cache. - * None of the dquots should really be busy at this point. - */ - if (mp->m_quotainfo) { - while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) { - delay(ndquots * 10); - } - } - return 0; -} - -STATIC int -xfs_qm_dqattach_one( - xfs_inode_t *ip, - xfs_dqid_t id, - uint type, - uint doalloc, - xfs_dquot_t *udqhint, /* hint */ - xfs_dquot_t **IO_idqpp) -{ - xfs_dquot_t *dqp; - int error; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - error = 0; - - /* - * See if we already have it in the inode itself. IO_idqpp is - * &i_udquot or &i_gdquot. This made the code look weird, but - * made the logic a lot simpler. - */ - dqp = *IO_idqpp; - if (dqp) { - trace_xfs_dqattach_found(dqp); - return 0; - } - - /* - * udqhint is the i_udquot field in inode, and is non-NULL only - * when the type arg is group/project. Its purpose is to save a - * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside - * the user dquot. - */ - if (udqhint) { - ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); - xfs_dqlock(udqhint); - - /* - * No need to take dqlock to look at the id. - * - * The ID can't change until it gets reclaimed, and it won't - * be reclaimed as long as we have a ref from inode and we - * hold the ilock. - */ - dqp = udqhint->q_gdquot; - if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { - xfs_dqlock(dqp); - XFS_DQHOLD(dqp); - ASSERT(*IO_idqpp == NULL); - *IO_idqpp = dqp; - - xfs_dqunlock(dqp); - xfs_dqunlock(udqhint); - return 0; - } - - /* - * We can't hold a dquot lock when we call the dqget code. - * We'll deadlock in no time, because of (not conforming to) - * lock ordering - the inodelock comes before any dquot lock, - * and we may drop and reacquire the ilock in xfs_qm_dqget(). - */ - xfs_dqunlock(udqhint); - } - - /* - * Find the dquot from somewhere. This bumps the - * reference count of dquot and returns it locked. - * This can return ENOENT if dquot didn't exist on - * disk and we didn't ask it to allocate; - * ESRCH if quotas got turned off suddenly. - */ - error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp); - if (error) - return error; - - trace_xfs_dqattach_get(dqp); - - /* - * dqget may have dropped and re-acquired the ilock, but it guarantees - * that the dquot returned is the one that should go in the inode. - */ - *IO_idqpp = dqp; - xfs_dqunlock(dqp); - return 0; -} - - -/* - * Given a udquot and gdquot, attach a ptr to the group dquot in the - * udquot as a hint for future lookups. The idea sounds simple, but the - * execution isn't, because the udquot might have a group dquot attached - * already and getting rid of that gets us into lock ordering constraints. - * The process is complicated more by the fact that the dquots may or may not - * be locked on entry. - */ -STATIC void -xfs_qm_dqattach_grouphint( - xfs_dquot_t *udq, - xfs_dquot_t *gdq) -{ - xfs_dquot_t *tmp; - - xfs_dqlock(udq); - - if ((tmp = udq->q_gdquot)) { - if (tmp == gdq) { - xfs_dqunlock(udq); - return; - } - - udq->q_gdquot = NULL; - /* - * We can't keep any dqlocks when calling dqrele, - * because the freelist lock comes before dqlocks. - */ - xfs_dqunlock(udq); - /* - * we took a hard reference once upon a time in dqget, - * so give it back when the udquot no longer points at it - * dqput() does the unlocking of the dquot. - */ - xfs_qm_dqrele(tmp); - - xfs_dqlock(udq); - xfs_dqlock(gdq); - - } else { - ASSERT(XFS_DQ_IS_LOCKED(udq)); - xfs_dqlock(gdq); - } - - ASSERT(XFS_DQ_IS_LOCKED(udq)); - ASSERT(XFS_DQ_IS_LOCKED(gdq)); - /* - * Somebody could have attached a gdquot here, - * when we dropped the uqlock. If so, just do nothing. - */ - if (udq->q_gdquot == NULL) { - XFS_DQHOLD(gdq); - udq->q_gdquot = gdq; - } - - xfs_dqunlock(gdq); - xfs_dqunlock(udq); -} - - -/* - * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON - * into account. - * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. - * Inode may get unlocked and relocked in here, and the caller must deal with - * the consequences. - */ -int -xfs_qm_dqattach_locked( - xfs_inode_t *ip, - uint flags) -{ - xfs_mount_t *mp = ip->i_mount; - uint nquotas = 0; - int error = 0; - - if (!XFS_IS_QUOTA_RUNNING(mp) || - !XFS_IS_QUOTA_ON(mp) || - !XFS_NOT_DQATTACHED(mp, ip) || - ip->i_ino == mp->m_sb.sb_uquotino || - ip->i_ino == mp->m_sb.sb_gquotino) - return 0; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - if (XFS_IS_UQUOTA_ON(mp)) { - error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, - flags & XFS_QMOPT_DQALLOC, - NULL, &ip->i_udquot); - if (error) - goto done; - nquotas++; - } - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (XFS_IS_OQUOTA_ON(mp)) { - error = XFS_IS_GQUOTA_ON(mp) ? - xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, - flags & XFS_QMOPT_DQALLOC, - ip->i_udquot, &ip->i_gdquot) : - xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, - flags & XFS_QMOPT_DQALLOC, - ip->i_udquot, &ip->i_gdquot); - /* - * Don't worry about the udquot that we may have - * attached above. It'll get detached, if not already. - */ - if (error) - goto done; - nquotas++; - } - - /* - * Attach this group quota to the user quota as a hint. - * This WON'T, in general, result in a thrash. - */ - if (nquotas == 2) { - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(ip->i_udquot); - ASSERT(ip->i_gdquot); - - /* - * We may or may not have the i_udquot locked at this point, - * but this check is OK since we don't depend on the i_gdquot to - * be accurate 100% all the time. It is just a hint, and this - * will succeed in general. - */ - if (ip->i_udquot->q_gdquot == ip->i_gdquot) - goto done; - /* - * Attach i_gdquot to the gdquot hint inside the i_udquot. - */ - xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); - } - - done: -#ifdef DEBUG - if (!error) { - if (XFS_IS_UQUOTA_ON(mp)) - ASSERT(ip->i_udquot); - if (XFS_IS_OQUOTA_ON(mp)) - ASSERT(ip->i_gdquot); - } - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -#endif - return error; -} - -int -xfs_qm_dqattach( - struct xfs_inode *ip, - uint flags) -{ - int error; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_qm_dqattach_locked(ip, flags); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - return error; -} - -/* - * Release dquots (and their references) if any. - * The inode should be locked EXCL except when this's called by - * xfs_ireclaim. - */ -void -xfs_qm_dqdetach( - xfs_inode_t *ip) -{ - if (!(ip->i_udquot || ip->i_gdquot)) - return; - - trace_xfs_dquot_dqdetach(ip); - - ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); - ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); - if (ip->i_udquot) { - xfs_qm_dqrele(ip->i_udquot); - ip->i_udquot = NULL; - } - if (ip->i_gdquot) { - xfs_qm_dqrele(ip->i_gdquot); - ip->i_gdquot = NULL; - } -} - -int -xfs_qm_sync( - struct xfs_mount *mp, - int flags) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - int recl, restarts; - struct xfs_dquot *dqp; - int error; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return 0; - - restarts = 0; - - again: - mutex_lock(&q->qi_dqlist_lock); - /* - * dqpurge_all() also takes the mplist lock and iterate thru all dquots - * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared - * when we have the mplist lock, we know that dquots will be consistent - * as long as we have it locked. - */ - if (!XFS_IS_QUOTA_ON(mp)) { - mutex_unlock(&q->qi_dqlist_lock); - return 0; - } - ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); - list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { - /* - * If this is vfs_sync calling, then skip the dquots that - * don't 'seem' to be dirty. ie. don't acquire dqlock. - * This is very similar to what xfs_sync does with inodes. - */ - if (flags & SYNC_TRYLOCK) { - if (!XFS_DQ_IS_DIRTY(dqp)) - continue; - if (!xfs_qm_dqlock_nowait(dqp)) - continue; - } else { - xfs_dqlock(dqp); - } - - /* - * Now, find out for sure if this dquot is dirty or not. - */ - if (! XFS_DQ_IS_DIRTY(dqp)) { - xfs_dqunlock(dqp); - continue; - } - - /* XXX a sentinel would be better */ - recl = q->qi_dqreclaims; - if (!xfs_dqflock_nowait(dqp)) { - if (flags & SYNC_TRYLOCK) { - xfs_dqunlock(dqp); - continue; - } - /* - * If we can't grab the flush lock then if the caller - * really wanted us to give this our best shot, so - * see if we can give a push to the buffer before we wait - * on the flush lock. At this point, we know that - * even though the dquot is being flushed, - * it has (new) dirty data. - */ - xfs_qm_dqflock_pushbuf_wait(dqp); - } - /* - * Let go of the mplist lock. We don't want to hold it - * across a disk write - */ - mutex_unlock(&q->qi_dqlist_lock); - error = xfs_qm_dqflush(dqp, flags); - xfs_dqunlock(dqp); - if (error && XFS_FORCED_SHUTDOWN(mp)) - return 0; /* Need to prevent umount failure */ - else if (error) - return error; - - mutex_lock(&q->qi_dqlist_lock); - if (recl != q->qi_dqreclaims) { - if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS) - break; - - mutex_unlock(&q->qi_dqlist_lock); - goto again; - } - } - - mutex_unlock(&q->qi_dqlist_lock); - return 0; -} - -/* - * The hash chains and the mplist use the same xfs_dqhash structure as - * their list head, but we can take the mplist qh_lock and one of the - * hash qh_locks at the same time without any problem as they aren't - * related. - */ -static struct lock_class_key xfs_quota_mplist_class; - -/* - * This initializes all the quota information that's kept in the - * mount structure - */ -STATIC int -xfs_qm_init_quotainfo( - xfs_mount_t *mp) -{ - xfs_quotainfo_t *qinf; - int error; - xfs_dquot_t *dqp; - - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - /* - * Tell XQM that we exist as soon as possible. - */ - if ((error = xfs_qm_hold_quotafs_ref(mp))) { - return error; - } - - qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); - - /* - * See if quotainodes are setup, and if not, allocate them, - * and change the superblock accordingly. - */ - if ((error = xfs_qm_init_quotainos(mp))) { - kmem_free(qinf); - mp->m_quotainfo = NULL; - return error; - } - - INIT_LIST_HEAD(&qinf->qi_dqlist); - mutex_init(&qinf->qi_dqlist_lock); - lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class); - - qinf->qi_dqreclaims = 0; - - /* mutex used to serialize quotaoffs */ - mutex_init(&qinf->qi_quotaofflock); - - /* Precalc some constants */ - qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); - ASSERT(qinf->qi_dqchunklen); - qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen); - do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t)); - - mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); - - /* - * We try to get the limits from the superuser's limits fields. - * This is quite hacky, but it is standard quota practice. - * We look at the USR dquot with id == 0 first, but if user quotas - * are not enabled we goto the GRP dquot with id == 0. - * We don't really care to keep separate default limits for user - * and group quotas, at least not at this point. - */ - error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0, - XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : - (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : - XFS_DQ_PROJ), - XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN, - &dqp); - if (! error) { - xfs_disk_dquot_t *ddqp = &dqp->q_core; - - /* - * The warnings and timers set the grace period given to - * a user or group before he or she can not perform any - * more writing. If it is zero, a default is used. - */ - qinf->qi_btimelimit = ddqp->d_btimer ? - be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT; - qinf->qi_itimelimit = ddqp->d_itimer ? - be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT; - qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ? - be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT; - qinf->qi_bwarnlimit = ddqp->d_bwarns ? - be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT; - qinf->qi_iwarnlimit = ddqp->d_iwarns ? - be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT; - qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ? - be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT; - qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); - qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit); - qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); - qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); - qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); - qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); - - /* - * We sent the XFS_QMOPT_DQSUSER flag to dqget because - * we don't want this dquot cached. We haven't done a - * quotacheck yet, and quotacheck doesn't like incore dquots. - */ - xfs_qm_dqdestroy(dqp); - } else { - qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; - qinf->qi_itimelimit = XFS_QM_ITIMELIMIT; - qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT; - qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT; - qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT; - qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; - } - - return 0; -} - - -/* - * Gets called when unmounting a filesystem or when all quotas get - * turned off. - * This purges the quota inodes, destroys locks and frees itself. - */ -void -xfs_qm_destroy_quotainfo( - xfs_mount_t *mp) -{ - xfs_quotainfo_t *qi; - - qi = mp->m_quotainfo; - ASSERT(qi != NULL); - ASSERT(xfs_Gqm != NULL); - - /* - * Release the reference that XQM kept, so that we know - * when the XQM structure should be freed. We cannot assume - * that xfs_Gqm is non-null after this point. - */ - xfs_qm_rele_quotafs_ref(mp); - - ASSERT(list_empty(&qi->qi_dqlist)); - mutex_destroy(&qi->qi_dqlist_lock); - - if (qi->qi_uquotaip) { - IRELE(qi->qi_uquotaip); - qi->qi_uquotaip = NULL; /* paranoia */ - } - if (qi->qi_gquotaip) { - IRELE(qi->qi_gquotaip); - qi->qi_gquotaip = NULL; - } - mutex_destroy(&qi->qi_quotaofflock); - kmem_free(qi); - mp->m_quotainfo = NULL; -} - - - -/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */ - -/* ARGSUSED */ -STATIC void -xfs_qm_list_init( - xfs_dqlist_t *list, - char *str, - int n) -{ - mutex_init(&list->qh_lock); - INIT_LIST_HEAD(&list->qh_list); - list->qh_version = 0; - list->qh_nelems = 0; -} - -STATIC void -xfs_qm_list_destroy( - xfs_dqlist_t *list) -{ - mutex_destroy(&(list->qh_lock)); -} - -/* - * Create an inode and return with a reference already taken, but unlocked - * This is how we create quota inodes - */ -STATIC int -xfs_qm_qino_alloc( - xfs_mount_t *mp, - xfs_inode_t **ip, - __int64_t sbfields, - uint flags) -{ - xfs_trans_t *tp; - int error; - int committed; - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); - if ((error = xfs_trans_reserve(tp, - XFS_QM_QINOCREATE_SPACE_RES(mp), - XFS_CREATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_CREATE_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - return error; - } - - error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); - if (error) { - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | - XFS_TRANS_ABORT); - return error; - } - - /* - * Make the changes in the superblock, and log those too. - * sbfields arg may contain fields other than *QUOTINO; - * VERSIONNUM for example. - */ - spin_lock(&mp->m_sb_lock); - if (flags & XFS_QMOPT_SBVERSION) { - ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); - ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | - XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == - (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | - XFS_SB_GQUOTINO | XFS_SB_QFLAGS)); - - xfs_sb_version_addquota(&mp->m_sb); - mp->m_sb.sb_uquotino = NULLFSINO; - mp->m_sb.sb_gquotino = NULLFSINO; - - /* qflags will get updated _after_ quotacheck */ - mp->m_sb.sb_qflags = 0; - } - if (flags & XFS_QMOPT_UQUOTA) - mp->m_sb.sb_uquotino = (*ip)->i_ino; - else - mp->m_sb.sb_gquotino = (*ip)->i_ino; - spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, sbfields); - - if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { - xfs_alert(mp, "%s failed (error %d)!", __func__, error); - return error; - } - return 0; -} - - -STATIC void -xfs_qm_reset_dqcounts( - xfs_mount_t *mp, - xfs_buf_t *bp, - xfs_dqid_t id, - uint type) -{ - xfs_disk_dquot_t *ddq; - int j; - - trace_xfs_reset_dqcounts(bp, _RET_IP_); - - /* - * Reset all counters and timers. They'll be - * started afresh by xfs_qm_quotacheck. - */ -#ifdef DEBUG - j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); - do_div(j, sizeof(xfs_dqblk_t)); - ASSERT(mp->m_quotainfo->qi_dqperchunk == j); -#endif - ddq = bp->b_addr; - for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { - /* - * Do a sanity check, and if needed, repair the dqblk. Don't - * output any warnings because it's perfectly possible to - * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. - */ - (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, - "xfs_quotacheck"); - ddq->d_bcount = 0; - ddq->d_icount = 0; - ddq->d_rtbcount = 0; - ddq->d_btimer = 0; - ddq->d_itimer = 0; - ddq->d_rtbtimer = 0; - ddq->d_bwarns = 0; - ddq->d_iwarns = 0; - ddq->d_rtbwarns = 0; - ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); - } -} - -STATIC int -xfs_qm_dqiter_bufs( - xfs_mount_t *mp, - xfs_dqid_t firstid, - xfs_fsblock_t bno, - xfs_filblks_t blkcnt, - uint flags) -{ - xfs_buf_t *bp; - int error; - int type; - - ASSERT(blkcnt > 0); - type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : - (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); - error = 0; - - /* - * Blkcnt arg can be a very big number, and might even be - * larger than the log itself. So, we have to break it up into - * manageable-sized transactions. - * Note that we don't start a permanent transaction here; we might - * not be able to get a log reservation for the whole thing up front, - * and we don't really care to either, because we just discard - * everything if we were to crash in the middle of this loop. - */ - while (blkcnt--) { - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, - XFS_FSB_TO_DADDR(mp, bno), - mp->m_quotainfo->qi_dqchunklen, 0, &bp); - if (error) - break; - - xfs_qm_reset_dqcounts(mp, bp, firstid, type); - xfs_bdwrite(mp, bp); - /* - * goto the next block. - */ - bno++; - firstid += mp->m_quotainfo->qi_dqperchunk; - } - return error; -} - -/* - * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a - * caller supplied function for every chunk of dquots that we find. - */ -STATIC int -xfs_qm_dqiterate( - xfs_mount_t *mp, - xfs_inode_t *qip, - uint flags) -{ - xfs_bmbt_irec_t *map; - int i, nmaps; /* number of map entries */ - int error; /* return value */ - xfs_fileoff_t lblkno; - xfs_filblks_t maxlblkcnt; - xfs_dqid_t firstid; - xfs_fsblock_t rablkno; - xfs_filblks_t rablkcnt; - - error = 0; - /* - * This looks racy, but we can't keep an inode lock across a - * trans_reserve. But, this gets called during quotacheck, and that - * happens only at mount time which is single threaded. - */ - if (qip->i_d.di_nblocks == 0) - return 0; - - map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP); - - lblkno = 0; - maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); - do { - nmaps = XFS_DQITER_MAP_SIZE; - /* - * We aren't changing the inode itself. Just changing - * some of its data. No new blocks are added here, and - * the inode is never added to the transaction. - */ - xfs_ilock(qip, XFS_ILOCK_SHARED); - error = xfs_bmapi(NULL, qip, lblkno, - maxlblkcnt - lblkno, - XFS_BMAPI_METADATA, - NULL, - 0, map, &nmaps, NULL); - xfs_iunlock(qip, XFS_ILOCK_SHARED); - if (error) - break; - - ASSERT(nmaps <= XFS_DQITER_MAP_SIZE); - for (i = 0; i < nmaps; i++) { - ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); - ASSERT(map[i].br_blockcount); - - - lblkno += map[i].br_blockcount; - - if (map[i].br_startblock == HOLESTARTBLOCK) - continue; - - firstid = (xfs_dqid_t) map[i].br_startoff * - mp->m_quotainfo->qi_dqperchunk; - /* - * Do a read-ahead on the next extent. - */ - if ((i+1 < nmaps) && - (map[i+1].br_startblock != HOLESTARTBLOCK)) { - rablkcnt = map[i+1].br_blockcount; - rablkno = map[i+1].br_startblock; - while (rablkcnt--) { - xfs_buf_readahead(mp->m_ddev_targp, - XFS_FSB_TO_DADDR(mp, rablkno), - mp->m_quotainfo->qi_dqchunklen); - rablkno++; - } - } - /* - * Iterate thru all the blks in the extent and - * reset the counters of all the dquots inside them. - */ - if ((error = xfs_qm_dqiter_bufs(mp, - firstid, - map[i].br_startblock, - map[i].br_blockcount, - flags))) { - break; - } - } - - if (error) - break; - } while (nmaps > 0); - - kmem_free(map); - - return error; -} - -/* - * Called by dqusage_adjust in doing a quotacheck. - * - * Given the inode, and a dquot id this updates both the incore dqout as well - * as the buffer copy. This is so that once the quotacheck is done, we can - * just log all the buffers, as opposed to logging numerous updates to - * individual dquots. - */ -STATIC int -xfs_qm_quotacheck_dqadjust( - struct xfs_inode *ip, - xfs_dqid_t id, - uint type, - xfs_qcnt_t nblks, - xfs_qcnt_t rtblks) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_dquot *dqp; - int error; - - error = xfs_qm_dqget(mp, ip, id, type, - XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); - if (error) { - /* - * Shouldn't be able to turn off quotas here. - */ - ASSERT(error != ESRCH); - ASSERT(error != ENOENT); - return error; - } - - trace_xfs_dqadjust(dqp); - - /* - * Adjust the inode count and the block count to reflect this inode's - * resource usage. - */ - be64_add_cpu(&dqp->q_core.d_icount, 1); - dqp->q_res_icount++; - if (nblks) { - be64_add_cpu(&dqp->q_core.d_bcount, nblks); - dqp->q_res_bcount += nblks; - } - if (rtblks) { - be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks); - dqp->q_res_rtbcount += rtblks; - } - - /* - * Set default limits, adjust timers (since we changed usages) - * - * There are no timers for the default values set in the root dquot. - */ - if (dqp->q_core.d_id) { - xfs_qm_adjust_dqlimits(mp, &dqp->q_core); - xfs_qm_adjust_dqtimers(mp, &dqp->q_core); - } - - dqp->dq_flags |= XFS_DQ_DIRTY; - xfs_qm_dqput(dqp); - return 0; -} - -STATIC int -xfs_qm_get_rtblks( - xfs_inode_t *ip, - xfs_qcnt_t *O_rtblks) -{ - xfs_filblks_t rtblks; /* total rt blks */ - xfs_extnum_t idx; /* extent record index */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_extnum_t nextents; /* number of extent entries */ - int error; - - ASSERT(XFS_IS_REALTIME_INODE(ip)); - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK))) - return error; - } - rtblks = 0; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - for (idx = 0; idx < nextents; idx++) - rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); - *O_rtblks = (xfs_qcnt_t)rtblks; - return 0; -} - -/* - * callback routine supplied to bulkstat(). Given an inumber, find its - * dquots and update them to account for resources taken by that inode. - */ -/* ARGSUSED */ -STATIC int -xfs_qm_dqusage_adjust( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* not used */ - int ubsize, /* not used */ - int *ubused, /* not used */ - int *res) /* result code value */ -{ - xfs_inode_t *ip; - xfs_qcnt_t nblks, rtblks = 0; - int error; - - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - /* - * rootino must have its resources accounted for, not so with the quota - * inodes. - */ - if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { - *res = BULKSTAT_RV_NOTHING; - return XFS_ERROR(EINVAL); - } - - /* - * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget - * interface expects the inode to be exclusively locked because that's - * the case in all other instances. It's OK that we do this because - * quotacheck is done only at mount time. - */ - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); - if (error) { - *res = BULKSTAT_RV_NOTHING; - return error; - } - - ASSERT(ip->i_delayed_blks == 0); - - if (XFS_IS_REALTIME_INODE(ip)) { - /* - * Walk thru the extent list and count the realtime blocks. - */ - error = xfs_qm_get_rtblks(ip, &rtblks); - if (error) - goto error0; - } - - nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; - - /* - * Add the (disk blocks and inode) resources occupied by this - * inode to its dquots. We do this adjustment in the incore dquot, - * and also copy the changes to its buffer. - * We don't care about putting these changes in a transaction - * envelope because if we crash in the middle of a 'quotacheck' - * we have to start from the beginning anyway. - * Once we're done, we'll log all the dquot bufs. - * - * The *QUOTA_ON checks below may look pretty racy, but quotachecks - * and quotaoffs don't race. (Quotachecks happen at mount time only). - */ - if (XFS_IS_UQUOTA_ON(mp)) { - error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid, - XFS_DQ_USER, nblks, rtblks); - if (error) - goto error0; - } - - if (XFS_IS_GQUOTA_ON(mp)) { - error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid, - XFS_DQ_GROUP, nblks, rtblks); - if (error) - goto error0; - } - - if (XFS_IS_PQUOTA_ON(mp)) { - error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip), - XFS_DQ_PROJ, nblks, rtblks); - if (error) - goto error0; - } - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - IRELE(ip); - *res = BULKSTAT_RV_DIDONE; - return 0; - -error0: - xfs_iunlock(ip, XFS_ILOCK_EXCL); - IRELE(ip); - *res = BULKSTAT_RV_GIVEUP; - return error; -} - -/* - * Walk thru all the filesystem inodes and construct a consistent view - * of the disk quota world. If the quotacheck fails, disable quotas. - */ -int -xfs_qm_quotacheck( - xfs_mount_t *mp) -{ - int done, count, error; - xfs_ino_t lastino; - size_t structsz; - xfs_inode_t *uip, *gip; - uint flags; - - count = INT_MAX; - structsz = 1; - lastino = 0; - flags = 0; - - ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - /* - * There should be no cached dquots. The (simplistic) quotacheck - * algorithm doesn't like that. - */ - ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); - - xfs_notice(mp, "Quotacheck needed: Please wait."); - - /* - * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset - * their counters to zero. We need a clean slate. - * We don't log our changes till later. - */ - uip = mp->m_quotainfo->qi_uquotaip; - if (uip) { - error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA); - if (error) - goto error_return; - flags |= XFS_UQUOTA_CHKD; - } - - gip = mp->m_quotainfo->qi_gquotaip; - if (gip) { - error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? - XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); - if (error) - goto error_return; - flags |= XFS_OQUOTA_CHKD; - } - - do { - /* - * Iterate thru all the inodes in the file system, - * adjusting the corresponding dquot counters in core. - */ - error = xfs_bulkstat(mp, &lastino, &count, - xfs_qm_dqusage_adjust, - structsz, NULL, &done); - if (error) - break; - - } while (!done); - - /* - * We've made all the changes that we need to make incore. - * Flush them down to disk buffers if everything was updated - * successfully. - */ - if (!error) - error = xfs_qm_dqflush_all(mp, 0); - - /* - * We can get this error if we couldn't do a dquot allocation inside - * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the - * dirty dquots that might be cached, we just want to get rid of them - * and turn quotaoff. The dquots won't be attached to any of the inodes - * at this point (because we intentionally didn't in dqget_noattach). - */ - if (error) { - xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); - goto error_return; - } - - /* - * We didn't log anything, because if we crashed, we'll have to - * start the quotacheck from scratch anyway. However, we must make - * sure that our dquot changes are secure before we put the - * quotacheck'd stamp on the superblock. So, here we do a synchronous - * flush. - */ - XFS_bflush(mp->m_ddev_targp); - - /* - * If one type of quotas is off, then it will lose its - * quotachecked status, since we won't be doing accounting for - * that type anymore. - */ - mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); - mp->m_qflags |= flags; - - error_return: - if (error) { - xfs_warn(mp, - "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", - error); - /* - * We must turn off quotas. - */ - ASSERT(mp->m_quotainfo != NULL); - ASSERT(xfs_Gqm != NULL); - xfs_qm_destroy_quotainfo(mp); - if (xfs_mount_reset_sbqflags(mp)) { - xfs_warn(mp, - "Quotacheck: Failed to reset quota flags."); - } - } else - xfs_notice(mp, "Quotacheck: Done."); - return (error); -} - -/* - * This is called after the superblock has been read in and we're ready to - * iget the quota inodes. - */ -STATIC int -xfs_qm_init_quotainos( - xfs_mount_t *mp) -{ - xfs_inode_t *uip, *gip; - int error; - __int64_t sbflags; - uint flags; - - ASSERT(mp->m_quotainfo); - uip = gip = NULL; - sbflags = 0; - flags = 0; - - /* - * Get the uquota and gquota inodes - */ - if (xfs_sb_version_hasquota(&mp->m_sb)) { - if (XFS_IS_UQUOTA_ON(mp) && - mp->m_sb.sb_uquotino != NULLFSINO) { - ASSERT(mp->m_sb.sb_uquotino > 0); - if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip))) - return XFS_ERROR(error); - } - if (XFS_IS_OQUOTA_ON(mp) && - mp->m_sb.sb_gquotino != NULLFSINO) { - ASSERT(mp->m_sb.sb_gquotino > 0); - if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip))) { - if (uip) - IRELE(uip); - return XFS_ERROR(error); - } - } - } else { - flags |= XFS_QMOPT_SBVERSION; - sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | - XFS_SB_GQUOTINO | XFS_SB_QFLAGS); - } - - /* - * Create the two inodes, if they don't exist already. The changes - * made above will get added to a transaction and logged in one of - * the qino_alloc calls below. If the device is readonly, - * temporarily switch to read-write to do this. - */ - if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { - if ((error = xfs_qm_qino_alloc(mp, &uip, - sbflags | XFS_SB_UQUOTINO, - flags | XFS_QMOPT_UQUOTA))) - return XFS_ERROR(error); - - flags &= ~XFS_QMOPT_SBVERSION; - } - if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { - flags |= (XFS_IS_GQUOTA_ON(mp) ? - XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); - error = xfs_qm_qino_alloc(mp, &gip, - sbflags | XFS_SB_GQUOTINO, flags); - if (error) { - if (uip) - IRELE(uip); - - return XFS_ERROR(error); - } - } - - mp->m_quotainfo->qi_uquotaip = uip; - mp->m_quotainfo->qi_gquotaip = gip; - - return 0; -} - - - -/* - * Just pop the least recently used dquot off the freelist and - * recycle it. The returned dquot is locked. - */ -STATIC xfs_dquot_t * -xfs_qm_dqreclaim_one(void) -{ - xfs_dquot_t *dqpout; - xfs_dquot_t *dqp; - int restarts; - int startagain; - - restarts = 0; - dqpout = NULL; - - /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ -again: - startagain = 0; - mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); - - list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { - struct xfs_mount *mp = dqp->q_mount; - xfs_dqlock(dqp); - - /* - * We are racing with dqlookup here. Naturally we don't - * want to reclaim a dquot that lookup wants. We release the - * freelist lock and start over, so that lookup will grab - * both the dquot and the freelistlock. - */ - if (dqp->dq_flags & XFS_DQ_WANT) { - ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); - - trace_xfs_dqreclaim_want(dqp); - XQM_STATS_INC(xqmstats.xs_qm_dqwants); - restarts++; - startagain = 1; - goto dqunlock; - } - - /* - * If the dquot is inactive, we are assured that it is - * not on the mplist or the hashlist, and that makes our - * life easier. - */ - if (dqp->dq_flags & XFS_DQ_INACTIVE) { - ASSERT(mp == NULL); - ASSERT(! XFS_DQ_IS_DIRTY(dqp)); - ASSERT(list_empty(&dqp->q_hashlist)); - ASSERT(list_empty(&dqp->q_mplist)); - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - dqpout = dqp; - XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); - goto dqunlock; - } - - ASSERT(dqp->q_hash); - ASSERT(!list_empty(&dqp->q_mplist)); - - /* - * Try to grab the flush lock. If this dquot is in the process - * of getting flushed to disk, we don't want to reclaim it. - */ - if (!xfs_dqflock_nowait(dqp)) - goto dqunlock; - - /* - * We have the flush lock so we know that this is not in the - * process of being flushed. So, if this is dirty, flush it - * DELWRI so that we don't get a freelist infested with - * dirty dquots. - */ - if (XFS_DQ_IS_DIRTY(dqp)) { - int error; - - trace_xfs_dqreclaim_dirty(dqp); - - /* - * We flush it delayed write, so don't bother - * releasing the freelist lock. - */ - error = xfs_qm_dqflush(dqp, 0); - if (error) { - xfs_warn(mp, "%s: dquot %p flush failed", - __func__, dqp); - } - goto dqunlock; - } - - /* - * We're trying to get the hashlock out of order. This races - * with dqlookup; so, we giveup and goto the next dquot if - * we couldn't get the hashlock. This way, we won't starve - * a dqlookup process that holds the hashlock that is - * waiting for the freelist lock. - */ - if (!mutex_trylock(&dqp->q_hash->qh_lock)) { - restarts++; - goto dqfunlock; - } - - /* - * This races with dquot allocation code as well as dqflush_all - * and reclaim code. So, if we failed to grab the mplist lock, - * giveup everything and start over. - */ - if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { - restarts++; - startagain = 1; - goto qhunlock; - } - - ASSERT(dqp->q_nrefs == 0); - list_del_init(&dqp->q_mplist); - mp->m_quotainfo->qi_dquots--; - mp->m_quotainfo->qi_dqreclaims++; - list_del_init(&dqp->q_hashlist); - dqp->q_hash->qh_version++; - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - dqpout = dqp; - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); -qhunlock: - mutex_unlock(&dqp->q_hash->qh_lock); -dqfunlock: - xfs_dqfunlock(dqp); -dqunlock: - xfs_dqunlock(dqp); - if (dqpout) - break; - if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - break; - if (startagain) { - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - goto again; - } - } - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - return dqpout; -} - -/* - * Traverse the freelist of dquots and attempt to reclaim a maximum of - * 'howmany' dquots. This operation races with dqlookup(), and attempts to - * favor the lookup function ... - */ -STATIC int -xfs_qm_shake_freelist( - int howmany) -{ - int nreclaimed = 0; - xfs_dquot_t *dqp; - - if (howmany <= 0) - return 0; - - while (nreclaimed < howmany) { - dqp = xfs_qm_dqreclaim_one(); - if (!dqp) - return nreclaimed; - xfs_qm_dqdestroy(dqp); - nreclaimed++; - } - return nreclaimed; -} - -/* - * The kmem_shake interface is invoked when memory is running low. - */ -/* ARGSUSED */ -STATIC int -xfs_qm_shake( - struct shrinker *shrink, - struct shrink_control *sc) -{ - int ndqused, nfree, n; - gfp_t gfp_mask = sc->gfp_mask; - - if (!kmem_shake_allow(gfp_mask)) - return 0; - if (!xfs_Gqm) - return 0; - - nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */ - /* incore dquots in all f/s's */ - ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; - - ASSERT(ndqused >= 0); - - if (nfree <= ndqused && nfree < ndquot) - return 0; - - ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */ - n = nfree - ndqused - ndquot; /* # over target */ - - return xfs_qm_shake_freelist(MAX(nfree, n)); -} - - -/*------------------------------------------------------------------*/ - -/* - * Return a new incore dquot. Depending on the number of - * dquots in the system, we either allocate a new one on the kernel heap, - * or reclaim a free one. - * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed - * to reclaim an existing one from the freelist. - */ -boolean_t -xfs_qm_dqalloc_incore( - xfs_dquot_t **O_dqpp) -{ - xfs_dquot_t *dqp; - - /* - * Check against high water mark to see if we want to pop - * a nincompoop dquot off the freelist. - */ - if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) { - /* - * Try to recycle a dquot from the freelist. - */ - if ((dqp = xfs_qm_dqreclaim_one())) { - XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); - /* - * Just zero the core here. The rest will get - * reinitialized by caller. XXX we shouldn't even - * do this zero ... - */ - memset(&dqp->q_core, 0, sizeof(dqp->q_core)); - *O_dqpp = dqp; - return B_FALSE; - } - XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); - } - - /* - * Allocate a brand new dquot on the kernel heap and return it - * to the caller to initialize. - */ - ASSERT(xfs_Gqm->qm_dqzone != NULL); - *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); - atomic_inc(&xfs_Gqm->qm_totaldquots); - - return B_TRUE; -} - - -/* - * Start a transaction and write the incore superblock changes to - * disk. flags parameter indicates which fields have changed. - */ -int -xfs_qm_write_sb_changes( - xfs_mount_t *mp, - __int64_t flags) -{ - xfs_trans_t *tp; - int error; - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - if ((error = xfs_trans_reserve(tp, 0, - mp->m_sb.sb_sectsize + 128, 0, - 0, - XFS_DEFAULT_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_mod_sb(tp, flags); - error = xfs_trans_commit(tp, 0); - - return error; -} - - -/* --------------- utility functions for vnodeops ---------------- */ - - -/* - * Given an inode, a uid, gid and prid make sure that we have - * allocated relevant dquot(s) on disk, and that we won't exceed inode - * quotas by creating this file. - * This also attaches dquot(s) to the given inode after locking it, - * and returns the dquots corresponding to the uid and/or gid. - * - * in : inode (unlocked) - * out : udquot, gdquot with references taken and unlocked - */ -int -xfs_qm_vop_dqalloc( - struct xfs_inode *ip, - uid_t uid, - gid_t gid, - prid_t prid, - uint flags, - struct xfs_dquot **O_udqpp, - struct xfs_dquot **O_gdqpp) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_dquot *uq, *gq; - int error; - uint lockflags; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return 0; - - lockflags = XFS_ILOCK_EXCL; - xfs_ilock(ip, lockflags); - - if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip)) - gid = ip->i_d.di_gid; - - /* - * Attach the dquot(s) to this inode, doing a dquot allocation - * if necessary. The dquot(s) will not be locked. - */ - if (XFS_NOT_DQATTACHED(mp, ip)) { - error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC); - if (error) { - xfs_iunlock(ip, lockflags); - return error; - } - } - - uq = gq = NULL; - if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { - if (ip->i_d.di_uid != uid) { - /* - * What we need is the dquot that has this uid, and - * if we send the inode to dqget, the uid of the inode - * takes priority over what's sent in the uid argument. - * We must unlock inode here before calling dqget if - * we're not sending the inode, because otherwise - * we'll deadlock by doing trans_reserve while - * holding ilock. - */ - xfs_iunlock(ip, lockflags); - if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, - XFS_DQ_USER, - XFS_QMOPT_DQALLOC | - XFS_QMOPT_DOWARN, - &uq))) { - ASSERT(error != ENOENT); - return error; - } - /* - * Get the ilock in the right order. - */ - xfs_dqunlock(uq); - lockflags = XFS_ILOCK_SHARED; - xfs_ilock(ip, lockflags); - } else { - /* - * Take an extra reference, because we'll return - * this to caller - */ - ASSERT(ip->i_udquot); - uq = ip->i_udquot; - xfs_dqlock(uq); - XFS_DQHOLD(uq); - xfs_dqunlock(uq); - } - } - if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { - if (ip->i_d.di_gid != gid) { - xfs_iunlock(ip, lockflags); - if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, - XFS_DQ_GROUP, - XFS_QMOPT_DQALLOC | - XFS_QMOPT_DOWARN, - &gq))) { - if (uq) - xfs_qm_dqrele(uq); - ASSERT(error != ENOENT); - return error; - } - xfs_dqunlock(gq); - lockflags = XFS_ILOCK_SHARED; - xfs_ilock(ip, lockflags); - } else { - ASSERT(ip->i_gdquot); - gq = ip->i_gdquot; - xfs_dqlock(gq); - XFS_DQHOLD(gq); - xfs_dqunlock(gq); - } - } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { - if (xfs_get_projid(ip) != prid) { - xfs_iunlock(ip, lockflags); - if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, - XFS_DQ_PROJ, - XFS_QMOPT_DQALLOC | - XFS_QMOPT_DOWARN, - &gq))) { - if (uq) - xfs_qm_dqrele(uq); - ASSERT(error != ENOENT); - return (error); - } - xfs_dqunlock(gq); - lockflags = XFS_ILOCK_SHARED; - xfs_ilock(ip, lockflags); - } else { - ASSERT(ip->i_gdquot); - gq = ip->i_gdquot; - xfs_dqlock(gq); - XFS_DQHOLD(gq); - xfs_dqunlock(gq); - } - } - if (uq) - trace_xfs_dquot_dqalloc(ip); - - xfs_iunlock(ip, lockflags); - if (O_udqpp) - *O_udqpp = uq; - else if (uq) - xfs_qm_dqrele(uq); - if (O_gdqpp) - *O_gdqpp = gq; - else if (gq) - xfs_qm_dqrele(gq); - return 0; -} - -/* - * Actually transfer ownership, and do dquot modifications. - * These were already reserved. - */ -xfs_dquot_t * -xfs_qm_vop_chown( - xfs_trans_t *tp, - xfs_inode_t *ip, - xfs_dquot_t **IO_olddq, - xfs_dquot_t *newdq) -{ - xfs_dquot_t *prevdq; - uint bfield = XFS_IS_REALTIME_INODE(ip) ? - XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; - - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); - - /* old dquot */ - prevdq = *IO_olddq; - ASSERT(prevdq); - ASSERT(prevdq != newdq); - - xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks)); - xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1); - - /* the sparkling new dquot */ - xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks); - xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); - - /* - * Take an extra reference, because the inode - * is going to keep this dquot pointer even - * after the trans_commit. - */ - xfs_dqlock(newdq); - XFS_DQHOLD(newdq); - xfs_dqunlock(newdq); - *IO_olddq = newdq; - - return prevdq; -} - -/* - * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID). - */ -int -xfs_qm_vop_chown_reserve( - xfs_trans_t *tp, - xfs_inode_t *ip, - xfs_dquot_t *udqp, - xfs_dquot_t *gdqp, - uint flags) -{ - xfs_mount_t *mp = ip->i_mount; - uint delblks, blkflags, prjflags = 0; - xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; - int error; - - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - delblks = ip->i_delayed_blks; - delblksudq = delblksgdq = unresudq = unresgdq = NULL; - blkflags = XFS_IS_REALTIME_INODE(ip) ? - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; - - if (XFS_IS_UQUOTA_ON(mp) && udqp && - ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { - delblksudq = udqp; - /* - * If there are delayed allocation blocks, then we have to - * unreserve those from the old dquot, and add them to the - * new dquot. - */ - if (delblks) { - ASSERT(ip->i_udquot); - unresudq = ip->i_udquot; - } - } - if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { - if (XFS_IS_PQUOTA_ON(ip->i_mount) && - xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) - prjflags = XFS_QMOPT_ENOSPC; - - if (prjflags || - (XFS_IS_GQUOTA_ON(ip->i_mount) && - ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { - delblksgdq = gdqp; - if (delblks) { - ASSERT(ip->i_gdquot); - unresgdq = ip->i_gdquot; - } - } - } - - if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, - delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, - flags | blkflags | prjflags))) - return (error); - - /* - * Do the delayed blks reservations/unreservations now. Since, these - * are done without the help of a transaction, if a reservation fails - * its previous reservations won't be automatically undone by trans - * code. So, we have to do it manually here. - */ - if (delblks) { - /* - * Do the reservations first. Unreservation can't fail. - */ - ASSERT(delblksudq || delblksgdq); - ASSERT(unresudq || unresgdq); - if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, - delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, - flags | blkflags | prjflags))) - return (error); - xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, - unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, - blkflags); - } - - return (0); -} - -int -xfs_qm_vop_rename_dqattach( - struct xfs_inode **i_tab) -{ - struct xfs_mount *mp = i_tab[0]->i_mount; - int i; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return 0; - - for (i = 0; (i < 4 && i_tab[i]); i++) { - struct xfs_inode *ip = i_tab[i]; - int error; - - /* - * Watch out for duplicate entries in the table. - */ - if (i == 0 || ip != i_tab[i-1]) { - if (XFS_NOT_DQATTACHED(mp, ip)) { - error = xfs_qm_dqattach(ip, 0); - if (error) - return error; - } - } - } - return 0; -} - -void -xfs_qm_vop_create_dqattach( - struct xfs_trans *tp, - struct xfs_inode *ip, - struct xfs_dquot *udqp, - struct xfs_dquot *gdqp) -{ - struct xfs_mount *mp = tp->t_mountp; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - if (udqp) { - xfs_dqlock(udqp); - XFS_DQHOLD(udqp); - xfs_dqunlock(udqp); - ASSERT(ip->i_udquot == NULL); - ip->i_udquot = udqp; - ASSERT(XFS_IS_UQUOTA_ON(mp)); - ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); - xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); - } - if (gdqp) { - xfs_dqlock(gdqp); - XFS_DQHOLD(gdqp); - xfs_dqunlock(gdqp); - ASSERT(ip->i_gdquot == NULL); - ip->i_gdquot = gdqp; - ASSERT(XFS_IS_OQUOTA_ON(mp)); - ASSERT((XFS_IS_GQUOTA_ON(mp) ? - ip->i_d.di_gid : xfs_get_projid(ip)) == - be32_to_cpu(gdqp->q_core.d_id)); - xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); - } -} - diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h deleted file mode 100644 index 43b9abe..0000000 --- a/fs/xfs/quota/xfs_qm.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_QM_H__ -#define __XFS_QM_H__ - -#include "xfs_dquot_item.h" -#include "xfs_dquot.h" -#include "xfs_quota_priv.h" -#include "xfs_qm_stats.h" - -struct xfs_qm; -struct xfs_inode; - -extern uint ndquot; -extern struct mutex xfs_Gqm_lock; -extern struct xfs_qm *xfs_Gqm; -extern kmem_zone_t *qm_dqzone; -extern kmem_zone_t *qm_dqtrxzone; - -/* - * Used in xfs_qm_sync called by xfs_sync to count the max times that it can - * iterate over the mountpt's dquot list in one call. - */ -#define XFS_QM_SYNC_MAX_RESTARTS 7 - -/* - * Ditto, for xfs_qm_dqreclaim_one. - */ -#define XFS_QM_RECLAIM_MAX_RESTARTS 4 - -/* - * Ideal ratio of free to in use dquots. Quota manager makes an attempt - * to keep this balance. - */ -#define XFS_QM_DQFREE_RATIO 2 - -/* - * Dquot hashtable constants/threshold values. - */ -#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) -#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t)) - -/* - * This defines the unit of allocation of dquots. - * Currently, it is just one file system block, and a 4K blk contains 30 - * (136 * 30 = 4080) dquots. It's probably not worth trying to make - * this more dynamic. - * XXXsup However, if this number is changed, we have to make sure that we don't - * implicitly assume that we do allocations in chunks of a single filesystem - * block in the dquot/xqm code. - */ -#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 - -typedef xfs_dqhash_t xfs_dqlist_t; - -/* - * Quota Manager (global) structure. Lives only in core. - */ -typedef struct xfs_qm { - xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */ - xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */ - uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */ - struct list_head qm_dqfrlist; /* freelist of dquots */ - struct mutex qm_dqfrlist_lock; - int qm_dqfrlist_cnt; - atomic_t qm_totaldquots; /* total incore dquots */ - uint qm_nrefs; /* file systems with quota on */ - int qm_dqfree_ratio;/* ratio of free to inuse dquots */ - kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ - kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ -} xfs_qm_t; - -/* - * Various quota information for individual filesystems. - * The mount structure keeps a pointer to this. - */ -typedef struct xfs_quotainfo { - xfs_inode_t *qi_uquotaip; /* user quota inode */ - xfs_inode_t *qi_gquotaip; /* group quota inode */ - struct list_head qi_dqlist; /* all dquots in filesys */ - struct mutex qi_dqlist_lock; - int qi_dquots; - int qi_dqreclaims; /* a change here indicates - a removal in the dqlist */ - time_t qi_btimelimit; /* limit for blks timer */ - time_t qi_itimelimit; /* limit for inodes timer */ - time_t qi_rtbtimelimit;/* limit for rt blks timer */ - xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ - xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ - xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ - struct mutex qi_quotaofflock;/* to serialize quotaoff */ - xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ - uint qi_dqperchunk; /* # ondisk dqs in above chunk */ - xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ - xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */ - xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */ - xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ - xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ - xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ -} xfs_quotainfo_t; - - -extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); -extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, - xfs_dquot_t *, xfs_dquot_t *, long, long, uint); -extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *); -extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *); - -/* - * We keep the usr and grp dquots separately so that locking will be easier - * to do at commit time. All transactions that we know of at this point - * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. - */ -#define XFS_QM_TRANS_MAXDQS 2 -typedef struct xfs_dquot_acct { - xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS]; - xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS]; -} xfs_dquot_acct_t; - -/* - * Users are allowed to have a usage exceeding their softlimit for - * a period this long. - */ -#define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */ -#define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */ -#define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */ - -#define XFS_QM_BWARNLIMIT 5 -#define XFS_QM_IWARNLIMIT 5 -#define XFS_QM_RTBWARNLIMIT 5 - -extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); -extern int xfs_qm_quotacheck(xfs_mount_t *); -extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); - -/* dquot stuff */ -extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **); -extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); -extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); - -/* quota ops */ -extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); -extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, - fs_disk_quota_t *); -extern int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint, - fs_disk_quota_t *); -extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); -extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); -extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); - -#endif /* __XFS_QM_H__ */ diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c deleted file mode 100644 index a0a829a..0000000 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_qm.h" - - -STATIC void -xfs_fill_statvfs_from_dquot( - struct kstatfs *statp, - xfs_disk_dquot_t *dp) -{ - __uint64_t limit; - - limit = dp->d_blk_softlimit ? - be64_to_cpu(dp->d_blk_softlimit) : - be64_to_cpu(dp->d_blk_hardlimit); - if (limit && statp->f_blocks > limit) { - statp->f_blocks = limit; - statp->f_bfree = statp->f_bavail = - (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? - (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; - } - - limit = dp->d_ino_softlimit ? - be64_to_cpu(dp->d_ino_softlimit) : - be64_to_cpu(dp->d_ino_hardlimit); - if (limit && statp->f_files > limit) { - statp->f_files = limit; - statp->f_ffree = - (statp->f_files > be64_to_cpu(dp->d_icount)) ? - (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0; - } -} - - -/* - * Directory tree accounting is implemented using project quotas, where - * the project identifier is inherited from parent directories. - * A statvfs (df, etc.) of a directory that is using project quota should - * return a statvfs of the project, not the entire filesystem. - * This makes such trees appear as if they are filesystems in themselves. - */ -void -xfs_qm_statvfs( - xfs_inode_t *ip, - struct kstatfs *statp) -{ - xfs_mount_t *mp = ip->i_mount; - xfs_dquot_t *dqp; - - if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { - xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); - xfs_qm_dqput(dqp); - } -} - -int -xfs_qm_newmount( - xfs_mount_t *mp, - uint *needquotamount, - uint *quotaflags) -{ - uint quotaondisk; - uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0; - - quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) && - (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT); - - if (quotaondisk) { - uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT; - pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT; - gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT; - } - - /* - * If the device itself is read-only, we can't allow - * the user to change the state of quota on the mount - - * this would generate a transaction on the ro device, - * which would lead to an I/O error and shutdown - */ - - if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || - (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || - (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || - (!pquotaondisk && XFS_IS_PQUOTA_ON(mp)) || - (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || - (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && - xfs_dev_is_read_only(mp, "changing quota state")) { - xfs_warn(mp, "please mount with%s%s%s%s.", - (!quotaondisk ? "out quota" : ""), - (uquotaondisk ? " usrquota" : ""), - (pquotaondisk ? " prjquota" : ""), - (gquotaondisk ? " grpquota" : "")); - return XFS_ERROR(EPERM); - } - - if (XFS_IS_QUOTA_ON(mp) || quotaondisk) { - /* - * Call mount_quotas at this point only if we won't have to do - * a quotacheck. - */ - if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { - /* - * If an error occurred, qm_mount_quotas code - * has already disabled quotas. So, just finish - * mounting, and get on with the boring life - * without disk quotas. - */ - xfs_qm_mount_quotas(mp); - } else { - /* - * Clear the quota flags, but remember them. This - * is so that the quota code doesn't get invoked - * before we're ready. This can happen when an - * inode goes inactive and wants to free blocks, - * or via xfs_log_mount_finish. - */ - *needquotamount = B_TRUE; - *quotaflags = mp->m_qflags; - mp->m_qflags = 0; - } - } - - return 0; -} - -void __init -xfs_qm_init(void) -{ - printk(KERN_INFO "SGI XFS Quota Management subsystem\n"); - mutex_init(&xfs_Gqm_lock); - xfs_qm_init_procfs(); -} - -void __exit -xfs_qm_exit(void) -{ - xfs_qm_cleanup_procfs(); - if (qm_dqzone) - kmem_zone_destroy(qm_dqzone); - if (qm_dqtrxzone) - kmem_zone_destroy(qm_dqtrxzone); -} diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c deleted file mode 100644 index 8671a0b..0000000 --- a/fs/xfs/quota/xfs_qm_stats.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_qm.h" - -struct xqmstats xqmstats; - -static int xqm_proc_show(struct seq_file *m, void *v) -{ - /* maximum; incore; ratio free to inuse; freelist */ - seq_printf(m, "%d\t%d\t%d\t%u\n", - ndquot, - xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, - xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, - xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); - return 0; -} - -static int xqm_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, xqm_proc_show, NULL); -} - -static const struct file_operations xqm_proc_fops = { - .owner = THIS_MODULE, - .open = xqm_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int xqmstat_proc_show(struct seq_file *m, void *v) -{ - /* quota performance statistics */ - seq_printf(m, "qm %u %u %u %u %u %u %u %u\n", - xqmstats.xs_qm_dqreclaims, - xqmstats.xs_qm_dqreclaim_misses, - xqmstats.xs_qm_dquot_dups, - xqmstats.xs_qm_dqcachemisses, - xqmstats.xs_qm_dqcachehits, - xqmstats.xs_qm_dqwants, - xqmstats.xs_qm_dqshake_reclaims, - xqmstats.xs_qm_dqinact_reclaims); - return 0; -} - -static int xqmstat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, xqmstat_proc_show, NULL); -} - -static const struct file_operations xqmstat_proc_fops = { - .owner = THIS_MODULE, - .open = xqmstat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -void -xfs_qm_init_procfs(void) -{ - proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops); - proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops); -} - -void -xfs_qm_cleanup_procfs(void) -{ - remove_proc_entry("fs/xfs/xqm", NULL); - remove_proc_entry("fs/xfs/xqmstat", NULL); -} diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h deleted file mode 100644 index 5b964fc..0000000 --- a/fs/xfs/quota/xfs_qm_stats.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2002 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_QM_STATS_H__ -#define __XFS_QM_STATS_H__ - -#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) - -/* - * XQM global statistics - */ -struct xqmstats { - __uint32_t xs_qm_dqreclaims; - __uint32_t xs_qm_dqreclaim_misses; - __uint32_t xs_qm_dquot_dups; - __uint32_t xs_qm_dqcachemisses; - __uint32_t xs_qm_dqcachehits; - __uint32_t xs_qm_dqwants; - __uint32_t xs_qm_dqshake_reclaims; - __uint32_t xs_qm_dqinact_reclaims; -}; - -extern struct xqmstats xqmstats; - -# define XQM_STATS_INC(count) ( (count)++ ) - -extern void xfs_qm_init_procfs(void); -extern void xfs_qm_cleanup_procfs(void); - -#else - -# define XQM_STATS_INC(count) do { } while (0) - -static inline void xfs_qm_init_procfs(void) { }; -static inline void xfs_qm_cleanup_procfs(void) { }; - -#endif - -#endif /* __XFS_QM_STATS_H__ */ diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c deleted file mode 100644 index 609246f..0000000 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ /dev/null @@ -1,906 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include - -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_utils.h" -#include "xfs_qm.h" -#include "xfs_trace.h" - -STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); -STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, - uint); -STATIC uint xfs_qm_export_flags(uint); -STATIC uint xfs_qm_export_qtype_flags(uint); -STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *, - fs_disk_quota_t *); - - -/* - * Turn off quota accounting and/or enforcement for all udquots and/or - * gdquots. Called only at unmount time. - * - * This assumes that there are no dquots of this file system cached - * incore, and modifies the ondisk dquot directly. Therefore, for example, - * it is an error to call this twice, without purging the cache. - */ -int -xfs_qm_scall_quotaoff( - xfs_mount_t *mp, - uint flags) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - uint dqtype; - int error; - uint inactivate_flags; - xfs_qoff_logitem_t *qoffstart; - int nculprits; - - /* - * No file system can have quotas enabled on disk but not in core. - * Note that quota utilities (like quotaoff) _expect_ - * errno == EEXIST here. - */ - if ((mp->m_qflags & flags) == 0) - return XFS_ERROR(EEXIST); - error = 0; - - flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); - - /* - * We don't want to deal with two quotaoffs messing up each other, - * so we're going to serialize it. quotaoff isn't exactly a performance - * critical thing. - * If quotaoff, then we must be dealing with the root filesystem. - */ - ASSERT(q); - mutex_lock(&q->qi_quotaofflock); - - /* - * If we're just turning off quota enforcement, change mp and go. - */ - if ((flags & XFS_ALL_QUOTA_ACCT) == 0) { - mp->m_qflags &= ~(flags); - - spin_lock(&mp->m_sb_lock); - mp->m_sb.sb_qflags = mp->m_qflags; - spin_unlock(&mp->m_sb_lock); - mutex_unlock(&q->qi_quotaofflock); - - /* XXX what to do if error ? Revert back to old vals incore ? */ - error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); - return (error); - } - - dqtype = 0; - inactivate_flags = 0; - /* - * If accounting is off, we must turn enforcement off, clear the - * quota 'CHKD' certificate to make it known that we have to - * do a quotacheck the next time this quota is turned on. - */ - if (flags & XFS_UQUOTA_ACCT) { - dqtype |= XFS_QMOPT_UQUOTA; - flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD); - inactivate_flags |= XFS_UQUOTA_ACTIVE; - } - if (flags & XFS_GQUOTA_ACCT) { - dqtype |= XFS_QMOPT_GQUOTA; - flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); - inactivate_flags |= XFS_GQUOTA_ACTIVE; - } else if (flags & XFS_PQUOTA_ACCT) { - dqtype |= XFS_QMOPT_PQUOTA; - flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); - inactivate_flags |= XFS_PQUOTA_ACTIVE; - } - - /* - * Nothing to do? Don't complain. This happens when we're just - * turning off quota enforcement. - */ - if ((mp->m_qflags & flags) == 0) - goto out_unlock; - - /* - * Write the LI_QUOTAOFF log record, and do SB changes atomically, - * and synchronously. If we fail to write, we should abort the - * operation as it cannot be recovered safely if we crash. - */ - error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); - if (error) - goto out_unlock; - - /* - * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct - * to take care of the race between dqget and quotaoff. We don't take - * any special locks to reset these bits. All processes need to check - * these bits *after* taking inode lock(s) to see if the particular - * quota type is in the process of being turned off. If *ACTIVE, it is - * guaranteed that all dquot structures and all quotainode ptrs will all - * stay valid as long as that inode is kept locked. - * - * There is no turning back after this. - */ - mp->m_qflags &= ~inactivate_flags; - - /* - * Give back all the dquot reference(s) held by inodes. - * Here we go thru every single incore inode in this file system, and - * do a dqrele on the i_udquot/i_gdquot that it may have. - * Essentially, as long as somebody has an inode locked, this guarantees - * that quotas will not be turned off. This is handy because in a - * transaction once we lock the inode(s) and check for quotaon, we can - * depend on the quota inodes (and other things) being valid as long as - * we keep the lock(s). - */ - xfs_qm_dqrele_all_inodes(mp, flags); - - /* - * Next we make the changes in the quota flag in the mount struct. - * This isn't protected by a particular lock directly, because we - * don't want to take a mrlock every time we depend on quotas being on. - */ - mp->m_qflags &= ~(flags); - - /* - * Go through all the dquots of this file system and purge them, - * according to what was turned off. We may not be able to get rid - * of all dquots, because dquots can have temporary references that - * are not attached to inodes. eg. xfs_setattr, xfs_create. - * So, if we couldn't purge all the dquots from the filesystem, - * we can't get rid of the incore data structures. - */ - while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype))) - delay(10 * nculprits); - - /* - * Transactions that had started before ACTIVE state bit was cleared - * could have logged many dquots, so they'd have higher LSNs than - * the first QUOTAOFF log record does. If we happen to crash when - * the tail of the log has gone past the QUOTAOFF record, but - * before the last dquot modification, those dquots __will__ - * recover, and that's not good. - * - * So, we have QUOTAOFF start and end logitems; the start - * logitem won't get overwritten until the end logitem appears... - */ - error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags); - if (error) { - /* We're screwed now. Shutdown is the only option. */ - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - goto out_unlock; - } - - /* - * If quotas is completely disabled, close shop. - */ - if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || - ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { - mutex_unlock(&q->qi_quotaofflock); - xfs_qm_destroy_quotainfo(mp); - return (0); - } - - /* - * Release our quotainode references if we don't need them anymore. - */ - if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) { - IRELE(q->qi_uquotaip); - q->qi_uquotaip = NULL; - } - if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) { - IRELE(q->qi_gquotaip); - q->qi_gquotaip = NULL; - } - -out_unlock: - mutex_unlock(&q->qi_quotaofflock); - return error; -} - -STATIC int -xfs_qm_scall_trunc_qfile( - struct xfs_mount *mp, - xfs_ino_t ino) -{ - struct xfs_inode *ip; - struct xfs_trans *tp; - int error; - - if (ino == NULLFSINO) - return 0; - - error = xfs_iget(mp, NULL, ino, 0, 0, &ip); - if (error) - return error; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); - error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT); - if (error) { - xfs_trans_cancel(tp, 0); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - goto out_put; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip); - - error = xfs_itruncate_data(&tp, ip, 0); - if (error) { - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | - XFS_TRANS_ABORT); - goto out_unlock; - } - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - -out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); -out_put: - IRELE(ip); - return error; -} - -int -xfs_qm_scall_trunc_qfiles( - xfs_mount_t *mp, - uint flags) -{ - int error = 0, error2 = 0; - - if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { - xfs_debug(mp, "%s: flags=%x m_qflags=%x\n", - __func__, flags, mp->m_qflags); - return XFS_ERROR(EINVAL); - } - - if (flags & XFS_DQ_USER) - error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); - if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) - error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); - - return error ? error : error2; -} - -/* - * Switch on (a given) quota enforcement for a filesystem. This takes - * effect immediately. - * (Switching on quota accounting must be done at mount time.) - */ -int -xfs_qm_scall_quotaon( - xfs_mount_t *mp, - uint flags) -{ - int error; - uint qf; - __int64_t sbflags; - - flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); - /* - * Switching on quota accounting must be done at mount time. - */ - flags &= ~(XFS_ALL_QUOTA_ACCT); - - sbflags = 0; - - if (flags == 0) { - xfs_debug(mp, "%s: zero flags, m_qflags=%x\n", - __func__, mp->m_qflags); - return XFS_ERROR(EINVAL); - } - - /* No fs can turn on quotas with a delayed effect */ - ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0); - - /* - * Can't enforce without accounting. We check the superblock - * qflags here instead of m_qflags because rootfs can have - * quota acct on ondisk without m_qflags' knowing. - */ - if (((flags & XFS_UQUOTA_ACCT) == 0 && - (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && - (flags & XFS_UQUOTA_ENFD)) - || - ((flags & XFS_PQUOTA_ACCT) == 0 && - (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && - (flags & XFS_GQUOTA_ACCT) == 0 && - (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && - (flags & XFS_OQUOTA_ENFD))) { - xfs_debug(mp, - "%s: Can't enforce without acct, flags=%x sbflags=%x\n", - __func__, flags, mp->m_sb.sb_qflags); - return XFS_ERROR(EINVAL); - } - /* - * If everything's up to-date incore, then don't waste time. - */ - if ((mp->m_qflags & flags) == flags) - return XFS_ERROR(EEXIST); - - /* - * Change sb_qflags on disk but not incore mp->qflags - * if this is the root filesystem. - */ - spin_lock(&mp->m_sb_lock); - qf = mp->m_sb.sb_qflags; - mp->m_sb.sb_qflags = qf | flags; - spin_unlock(&mp->m_sb_lock); - - /* - * There's nothing to change if it's the same. - */ - if ((qf & flags) == flags && sbflags == 0) - return XFS_ERROR(EEXIST); - sbflags |= XFS_SB_QFLAGS; - - if ((error = xfs_qm_write_sb_changes(mp, sbflags))) - return (error); - /* - * If we aren't trying to switch on quota enforcement, we are done. - */ - if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) != - (mp->m_qflags & XFS_UQUOTA_ACCT)) || - ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) != - (mp->m_qflags & XFS_PQUOTA_ACCT)) || - ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != - (mp->m_qflags & XFS_GQUOTA_ACCT)) || - (flags & XFS_ALL_QUOTA_ENFD) == 0) - return (0); - - if (! XFS_IS_QUOTA_RUNNING(mp)) - return XFS_ERROR(ESRCH); - - /* - * Switch on quota enforcement in core. - */ - mutex_lock(&mp->m_quotainfo->qi_quotaofflock); - mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); - mutex_unlock(&mp->m_quotainfo->qi_quotaofflock); - - return (0); -} - - -/* - * Return quota status information, such as uquota-off, enforcements, etc. - */ -int -xfs_qm_scall_getqstat( - struct xfs_mount *mp, - struct fs_quota_stat *out) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - struct xfs_inode *uip, *gip; - boolean_t tempuqip, tempgqip; - - uip = gip = NULL; - tempuqip = tempgqip = B_FALSE; - memset(out, 0, sizeof(fs_quota_stat_t)); - - out->qs_version = FS_QSTAT_VERSION; - if (!xfs_sb_version_hasquota(&mp->m_sb)) { - out->qs_uquota.qfs_ino = NULLFSINO; - out->qs_gquota.qfs_ino = NULLFSINO; - return (0); - } - out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & - (XFS_ALL_QUOTA_ACCT| - XFS_ALL_QUOTA_ENFD)); - out->qs_pad = 0; - out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino; - out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; - - if (q) { - uip = q->qi_uquotaip; - gip = q->qi_gquotaip; - } - if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { - if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip) == 0) - tempuqip = B_TRUE; - } - if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { - if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip) == 0) - tempgqip = B_TRUE; - } - if (uip) { - out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; - out->qs_uquota.qfs_nextents = uip->i_d.di_nextents; - if (tempuqip) - IRELE(uip); - } - if (gip) { - out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks; - out->qs_gquota.qfs_nextents = gip->i_d.di_nextents; - if (tempgqip) - IRELE(gip); - } - if (q) { - out->qs_incoredqs = q->qi_dquots; - out->qs_btimelimit = q->qi_btimelimit; - out->qs_itimelimit = q->qi_itimelimit; - out->qs_rtbtimelimit = q->qi_rtbtimelimit; - out->qs_bwarnlimit = q->qi_bwarnlimit; - out->qs_iwarnlimit = q->qi_iwarnlimit; - } - return 0; -} - -#define XFS_DQ_MASK \ - (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK) - -/* - * Adjust quota limits, and start/stop timers accordingly. - */ -int -xfs_qm_scall_setqlim( - xfs_mount_t *mp, - xfs_dqid_t id, - uint type, - fs_disk_quota_t *newlim) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - xfs_disk_dquot_t *ddq; - xfs_dquot_t *dqp; - xfs_trans_t *tp; - int error; - xfs_qcnt_t hard, soft; - - if (newlim->d_fieldmask & ~XFS_DQ_MASK) - return EINVAL; - if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) - return 0; - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); - if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, - 0, 0, XFS_DEFAULT_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - return (error); - } - - /* - * We don't want to race with a quotaoff so take the quotaoff lock. - * (We don't hold an inode lock, so there's nothing else to stop - * a quotaoff from happening). (XXXThis doesn't currently happen - * because we take the vfslock before calling xfs_qm_sysent). - */ - mutex_lock(&q->qi_quotaofflock); - - /* - * Get the dquot (locked), and join it to the transaction. - * Allocate the dquot if this doesn't exist. - */ - if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { - xfs_trans_cancel(tp, XFS_TRANS_ABORT); - ASSERT(error != ENOENT); - goto out_unlock; - } - xfs_trans_dqjoin(tp, dqp); - ddq = &dqp->q_core; - - /* - * Make sure that hardlimits are >= soft limits before changing. - */ - hard = (newlim->d_fieldmask & FS_DQ_BHARD) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) : - be64_to_cpu(ddq->d_blk_hardlimit); - soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) : - be64_to_cpu(ddq->d_blk_softlimit); - if (hard == 0 || hard >= soft) { - ddq->d_blk_hardlimit = cpu_to_be64(hard); - ddq->d_blk_softlimit = cpu_to_be64(soft); - if (id == 0) { - q->qi_bhardlimit = hard; - q->qi_bsoftlimit = soft; - } - } else { - xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft); - } - hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : - be64_to_cpu(ddq->d_rtb_hardlimit); - soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) : - be64_to_cpu(ddq->d_rtb_softlimit); - if (hard == 0 || hard >= soft) { - ddq->d_rtb_hardlimit = cpu_to_be64(hard); - ddq->d_rtb_softlimit = cpu_to_be64(soft); - if (id == 0) { - q->qi_rtbhardlimit = hard; - q->qi_rtbsoftlimit = soft; - } - } else { - xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft); - } - - hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? - (xfs_qcnt_t) newlim->d_ino_hardlimit : - be64_to_cpu(ddq->d_ino_hardlimit); - soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ? - (xfs_qcnt_t) newlim->d_ino_softlimit : - be64_to_cpu(ddq->d_ino_softlimit); - if (hard == 0 || hard >= soft) { - ddq->d_ino_hardlimit = cpu_to_be64(hard); - ddq->d_ino_softlimit = cpu_to_be64(soft); - if (id == 0) { - q->qi_ihardlimit = hard; - q->qi_isoftlimit = soft; - } - } else { - xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft); - } - - /* - * Update warnings counter(s) if requested - */ - if (newlim->d_fieldmask & FS_DQ_BWARNS) - ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns); - if (newlim->d_fieldmask & FS_DQ_IWARNS) - ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns); - if (newlim->d_fieldmask & FS_DQ_RTBWARNS) - ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns); - - if (id == 0) { - /* - * Timelimits for the super user set the relative time - * the other users can be over quota for this file system. - * If it is zero a default is used. Ditto for the default - * soft and hard limit values (already done, above), and - * for warnings. - */ - if (newlim->d_fieldmask & FS_DQ_BTIMER) { - q->qi_btimelimit = newlim->d_btimer; - ddq->d_btimer = cpu_to_be32(newlim->d_btimer); - } - if (newlim->d_fieldmask & FS_DQ_ITIMER) { - q->qi_itimelimit = newlim->d_itimer; - ddq->d_itimer = cpu_to_be32(newlim->d_itimer); - } - if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { - q->qi_rtbtimelimit = newlim->d_rtbtimer; - ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); - } - if (newlim->d_fieldmask & FS_DQ_BWARNS) - q->qi_bwarnlimit = newlim->d_bwarns; - if (newlim->d_fieldmask & FS_DQ_IWARNS) - q->qi_iwarnlimit = newlim->d_iwarns; - if (newlim->d_fieldmask & FS_DQ_RTBWARNS) - q->qi_rtbwarnlimit = newlim->d_rtbwarns; - } else { - /* - * If the user is now over quota, start the timelimit. - * The user will not be 'warned'. - * Note that we keep the timers ticking, whether enforcement - * is on or off. We don't really want to bother with iterating - * over all ondisk dquots and turning the timers on/off. - */ - xfs_qm_adjust_dqtimers(mp, ddq); - } - dqp->dq_flags |= XFS_DQ_DIRTY; - xfs_trans_log_dquot(tp, dqp); - - error = xfs_trans_commit(tp, 0); - xfs_qm_dqrele(dqp); - - out_unlock: - mutex_unlock(&q->qi_quotaofflock); - return error; -} - -int -xfs_qm_scall_getquota( - xfs_mount_t *mp, - xfs_dqid_t id, - uint type, - fs_disk_quota_t *out) -{ - xfs_dquot_t *dqp; - int error; - - /* - * Try to get the dquot. We don't want it allocated on disk, so - * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't - * exist, we'll get ENOENT back. - */ - if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) { - return (error); - } - - /* - * If everything's NULL, this dquot doesn't quite exist as far as - * our utility programs are concerned. - */ - if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { - xfs_qm_dqput(dqp); - return XFS_ERROR(ENOENT); - } - /* - * Convert the disk dquot to the exportable format - */ - xfs_qm_export_dquot(mp, &dqp->q_core, out); - xfs_qm_dqput(dqp); - return (error ? XFS_ERROR(EFAULT) : 0); -} - - -STATIC int -xfs_qm_log_quotaoff_end( - xfs_mount_t *mp, - xfs_qoff_logitem_t *startqoff, - uint flags) -{ - xfs_trans_t *tp; - int error; - xfs_qoff_logitem_t *qoffi; - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); - - if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2, - 0, 0, XFS_DEFAULT_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - return (error); - } - - qoffi = xfs_trans_get_qoff_item(tp, startqoff, - flags & XFS_ALL_QUOTA_ACCT); - xfs_trans_log_quotaoff_item(tp, qoffi); - - /* - * We have to make sure that the transaction is secure on disk before we - * return and actually stop quota accounting. So, make it synchronous. - * We don't care about quotoff's performance. - */ - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - return (error); -} - - -STATIC int -xfs_qm_log_quotaoff( - xfs_mount_t *mp, - xfs_qoff_logitem_t **qoffstartp, - uint flags) -{ - xfs_trans_t *tp; - int error; - xfs_qoff_logitem_t *qoffi=NULL; - uint oldsbqflag=0; - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); - if ((error = xfs_trans_reserve(tp, 0, - sizeof(xfs_qoff_logitem_t) * 2 + - mp->m_sb.sb_sectsize + 128, - 0, - 0, - XFS_DEFAULT_LOG_COUNT))) { - goto error0; - } - - qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); - xfs_trans_log_quotaoff_item(tp, qoffi); - - spin_lock(&mp->m_sb_lock); - oldsbqflag = mp->m_sb.sb_qflags; - mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; - spin_unlock(&mp->m_sb_lock); - - xfs_mod_sb(tp, XFS_SB_QFLAGS); - - /* - * We have to make sure that the transaction is secure on disk before we - * return and actually stop quota accounting. So, make it synchronous. - * We don't care about quotoff's performance. - */ - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - -error0: - if (error) { - xfs_trans_cancel(tp, 0); - /* - * No one else is modifying sb_qflags, so this is OK. - * We still hold the quotaofflock. - */ - spin_lock(&mp->m_sb_lock); - mp->m_sb.sb_qflags = oldsbqflag; - spin_unlock(&mp->m_sb_lock); - } - *qoffstartp = qoffi; - return (error); -} - - -/* - * Translate an internal style on-disk-dquot to the exportable format. - * The main differences are that the counters/limits are all in Basic - * Blocks (BBs) instead of the internal FSBs, and all on-disk data has - * to be converted to the native endianness. - */ -STATIC void -xfs_qm_export_dquot( - xfs_mount_t *mp, - xfs_disk_dquot_t *src, - struct fs_disk_quota *dst) -{ - memset(dst, 0, sizeof(*dst)); - dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */ - dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags); - dst->d_id = be32_to_cpu(src->d_id); - dst->d_blk_hardlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit)); - dst->d_blk_softlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit)); - dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit); - dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit); - dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount)); - dst->d_icount = be64_to_cpu(src->d_icount); - dst->d_btimer = be32_to_cpu(src->d_btimer); - dst->d_itimer = be32_to_cpu(src->d_itimer); - dst->d_iwarns = be16_to_cpu(src->d_iwarns); - dst->d_bwarns = be16_to_cpu(src->d_bwarns); - dst->d_rtb_hardlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit)); - dst->d_rtb_softlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit)); - dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount)); - dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer); - dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns); - - /* - * Internally, we don't reset all the timers when quota enforcement - * gets turned off. No need to confuse the user level code, - * so return zeroes in that case. - */ - if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) || - (!XFS_IS_OQUOTA_ENFORCED(mp) && - (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { - dst->d_btimer = 0; - dst->d_itimer = 0; - dst->d_rtbtimer = 0; - } - -#ifdef DEBUG - if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || - (XFS_IS_OQUOTA_ENFORCED(mp) && - (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && - dst->d_id != 0) { - if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && - (dst->d_blk_softlimit > 0)) { - ASSERT(dst->d_btimer != 0); - } - if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) && - (dst->d_ino_softlimit > 0)) { - ASSERT(dst->d_itimer != 0); - } - } -#endif -} - -STATIC uint -xfs_qm_export_qtype_flags( - uint flags) -{ - /* - * Can't be more than one, or none. - */ - ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) != - (FS_PROJ_QUOTA | FS_USER_QUOTA)); - ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) != - (FS_PROJ_QUOTA | FS_GROUP_QUOTA)); - ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) != - (FS_USER_QUOTA | FS_GROUP_QUOTA)); - ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0); - - return (flags & XFS_DQ_USER) ? - FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? - FS_PROJ_QUOTA : FS_GROUP_QUOTA; -} - -STATIC uint -xfs_qm_export_flags( - uint flags) -{ - uint uflags; - - uflags = 0; - if (flags & XFS_UQUOTA_ACCT) - uflags |= FS_QUOTA_UDQ_ACCT; - if (flags & XFS_PQUOTA_ACCT) - uflags |= FS_QUOTA_PDQ_ACCT; - if (flags & XFS_GQUOTA_ACCT) - uflags |= FS_QUOTA_GDQ_ACCT; - if (flags & XFS_UQUOTA_ENFD) - uflags |= FS_QUOTA_UDQ_ENFD; - if (flags & (XFS_OQUOTA_ENFD)) { - uflags |= (flags & XFS_GQUOTA_ACCT) ? - FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD; - } - return (uflags); -} - - -STATIC int -xfs_dqrele_inode( - struct xfs_inode *ip, - struct xfs_perag *pag, - int flags) -{ - /* skip quota inodes */ - if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || - ip == ip->i_mount->m_quotainfo->qi_gquotaip) { - ASSERT(ip->i_udquot == NULL); - ASSERT(ip->i_gdquot == NULL); - return 0; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { - xfs_qm_dqrele(ip->i_udquot); - ip->i_udquot = NULL; - } - if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { - xfs_qm_dqrele(ip->i_gdquot); - ip->i_gdquot = NULL; - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return 0; -} - - -/* - * Go thru all the inodes in the file system, releasing their dquots. - * - * Note that the mount structure gets modified to indicate that quotas are off - * AFTER this, in the case of quotaoff. - */ -void -xfs_qm_dqrele_all_inodes( - struct xfs_mount *mp, - uint flags) -{ - ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); -} diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h deleted file mode 100644 index 94a3d92..0000000 --- a/fs/xfs/quota/xfs_quota_priv.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_QUOTA_PRIV_H__ -#define __XFS_QUOTA_PRIV_H__ - -/* - * Number of bmaps that we ask from bmapi when doing a quotacheck. - * We make this restriction to keep the memory usage to a minimum. - */ -#define XFS_DQITER_MAP_SIZE 10 - -/* - * Hash into a bucket in the dquot hash table, based on . - */ -#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ - (__psunsigned_t)(id)) & \ - (xfs_Gqm->qm_dqhashmask - 1)) -#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \ - (xfs_Gqm->qm_usr_dqhtable + \ - XFS_DQ_HASHVAL(mp, id)) : \ - (xfs_Gqm->qm_grp_dqhtable + \ - XFS_DQ_HASHVAL(mp, id))) -#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ - !dqp->q_core.d_blk_hardlimit && \ - !dqp->q_core.d_blk_softlimit && \ - !dqp->q_core.d_rtb_hardlimit && \ - !dqp->q_core.d_rtb_softlimit && \ - !dqp->q_core.d_ino_hardlimit && \ - !dqp->q_core.d_ino_softlimit && \ - !dqp->q_core.d_bcount && \ - !dqp->q_core.d_rtbcount && \ - !dqp->q_core.d_icount) - -#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ - (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ - (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) - -#endif /* __XFS_QUOTA_PRIV_H__ */ diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c deleted file mode 100644 index 4d00ee6..0000000 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ /dev/null @@ -1,890 +0,0 @@ -/* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_trans_priv.h" -#include "xfs_qm.h" - -STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *); - -/* - * Add the locked dquot to the transaction. - * The dquot must be locked, and it cannot be associated with any - * transaction. - */ -void -xfs_trans_dqjoin( - xfs_trans_t *tp, - xfs_dquot_t *dqp) -{ - ASSERT(dqp->q_transp != tp); - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(dqp->q_logitem.qli_dquot == dqp); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); - - /* - * Initialize d_transp so we can later determine if this dquot is - * associated with this transaction. - */ - dqp->q_transp = tp; -} - - -/* - * This is called to mark the dquot as needing - * to be logged when the transaction is committed. The dquot must - * already be associated with the given transaction. - * Note that it marks the entire transaction as dirty. In the ordinary - * case, this gets called via xfs_trans_commit, after the transaction - * is already dirty. However, there's nothing stop this from getting - * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY - * flag. - */ -void -xfs_trans_log_dquot( - xfs_trans_t *tp, - xfs_dquot_t *dqp) -{ - ASSERT(dqp->q_transp == tp); - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - - tp->t_flags |= XFS_TRANS_DIRTY; - dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY; -} - -/* - * Carry forward whatever is left of the quota blk reservation to - * the spanky new transaction - */ -void -xfs_trans_dup_dqinfo( - xfs_trans_t *otp, - xfs_trans_t *ntp) -{ - xfs_dqtrx_t *oq, *nq; - int i,j; - xfs_dqtrx_t *oqa, *nqa; - - if (!otp->t_dqinfo) - return; - - xfs_trans_alloc_dqinfo(ntp); - oqa = otp->t_dqinfo->dqa_usrdquots; - nqa = ntp->t_dqinfo->dqa_usrdquots; - - /* - * Because the quota blk reservation is carried forward, - * it is also necessary to carry forward the DQ_DIRTY flag. - */ - if(otp->t_flags & XFS_TRANS_DQ_DIRTY) - ntp->t_flags |= XFS_TRANS_DQ_DIRTY; - - for (j = 0; j < 2; j++) { - for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { - if (oqa[i].qt_dquot == NULL) - break; - oq = &oqa[i]; - nq = &nqa[i]; - - nq->qt_dquot = oq->qt_dquot; - nq->qt_bcount_delta = nq->qt_icount_delta = 0; - nq->qt_rtbcount_delta = 0; - - /* - * Transfer whatever is left of the reservations. - */ - nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used; - oq->qt_blk_res = oq->qt_blk_res_used; - - nq->qt_rtblk_res = oq->qt_rtblk_res - - oq->qt_rtblk_res_used; - oq->qt_rtblk_res = oq->qt_rtblk_res_used; - - nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used; - oq->qt_ino_res = oq->qt_ino_res_used; - - } - oqa = otp->t_dqinfo->dqa_grpdquots; - nqa = ntp->t_dqinfo->dqa_grpdquots; - } -} - -/* - * Wrap around mod_dquot to account for both user and group quotas. - */ -void -xfs_trans_mod_dquot_byino( - xfs_trans_t *tp, - xfs_inode_t *ip, - uint field, - long delta) -{ - xfs_mount_t *mp = tp->t_mountp; - - if (!XFS_IS_QUOTA_RUNNING(mp) || - !XFS_IS_QUOTA_ON(mp) || - ip->i_ino == mp->m_sb.sb_uquotino || - ip->i_ino == mp->m_sb.sb_gquotino) - return; - - if (tp->t_dqinfo == NULL) - xfs_trans_alloc_dqinfo(tp); - - if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) - (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); - if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot) - (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); -} - -STATIC xfs_dqtrx_t * -xfs_trans_get_dqtrx( - xfs_trans_t *tp, - xfs_dquot_t *dqp) -{ - int i; - xfs_dqtrx_t *qa; - - qa = XFS_QM_ISUDQ(dqp) ? - tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots; - - for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { - if (qa[i].qt_dquot == NULL || - qa[i].qt_dquot == dqp) - return &qa[i]; - } - - return NULL; -} - -/* - * Make the changes in the transaction structure. - * The moral equivalent to xfs_trans_mod_sb(). - * We don't touch any fields in the dquot, so we don't care - * if it's locked or not (most of the time it won't be). - */ -void -xfs_trans_mod_dquot( - xfs_trans_t *tp, - xfs_dquot_t *dqp, - uint field, - long delta) -{ - xfs_dqtrx_t *qtrx; - - ASSERT(tp); - ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); - qtrx = NULL; - - if (tp->t_dqinfo == NULL) - xfs_trans_alloc_dqinfo(tp); - /* - * Find either the first free slot or the slot that belongs - * to this dquot. - */ - qtrx = xfs_trans_get_dqtrx(tp, dqp); - ASSERT(qtrx); - if (qtrx->qt_dquot == NULL) - qtrx->qt_dquot = dqp; - - switch (field) { - - /* - * regular disk blk reservation - */ - case XFS_TRANS_DQ_RES_BLKS: - qtrx->qt_blk_res += (ulong)delta; - break; - - /* - * inode reservation - */ - case XFS_TRANS_DQ_RES_INOS: - qtrx->qt_ino_res += (ulong)delta; - break; - - /* - * disk blocks used. - */ - case XFS_TRANS_DQ_BCOUNT: - if (qtrx->qt_blk_res && delta > 0) { - qtrx->qt_blk_res_used += (ulong)delta; - ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used); - } - qtrx->qt_bcount_delta += delta; - break; - - case XFS_TRANS_DQ_DELBCOUNT: - qtrx->qt_delbcnt_delta += delta; - break; - - /* - * Inode Count - */ - case XFS_TRANS_DQ_ICOUNT: - if (qtrx->qt_ino_res && delta > 0) { - qtrx->qt_ino_res_used += (ulong)delta; - ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used); - } - qtrx->qt_icount_delta += delta; - break; - - /* - * rtblk reservation - */ - case XFS_TRANS_DQ_RES_RTBLKS: - qtrx->qt_rtblk_res += (ulong)delta; - break; - - /* - * rtblk count - */ - case XFS_TRANS_DQ_RTBCOUNT: - if (qtrx->qt_rtblk_res && delta > 0) { - qtrx->qt_rtblk_res_used += (ulong)delta; - ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used); - } - qtrx->qt_rtbcount_delta += delta; - break; - - case XFS_TRANS_DQ_DELRTBCOUNT: - qtrx->qt_delrtb_delta += delta; - break; - - default: - ASSERT(0); - } - tp->t_flags |= XFS_TRANS_DQ_DIRTY; -} - - -/* - * Given an array of dqtrx structures, lock all the dquots associated - * and join them to the transaction, provided they have been modified. - * We know that the highest number of dquots (of one type - usr OR grp), - * involved in a transaction is 2 and that both usr and grp combined - 3. - * So, we don't attempt to make this very generic. - */ -STATIC void -xfs_trans_dqlockedjoin( - xfs_trans_t *tp, - xfs_dqtrx_t *q) -{ - ASSERT(q[0].qt_dquot != NULL); - if (q[1].qt_dquot == NULL) { - xfs_dqlock(q[0].qt_dquot); - xfs_trans_dqjoin(tp, q[0].qt_dquot); - } else { - ASSERT(XFS_QM_TRANS_MAXDQS == 2); - xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot); - xfs_trans_dqjoin(tp, q[0].qt_dquot); - xfs_trans_dqjoin(tp, q[1].qt_dquot); - } -} - - -/* - * Called by xfs_trans_commit() and similar in spirit to - * xfs_trans_apply_sb_deltas(). - * Go thru all the dquots belonging to this transaction and modify the - * INCORE dquot to reflect the actual usages. - * Unreserve just the reservations done by this transaction. - * dquot is still left locked at exit. - */ -void -xfs_trans_apply_dquot_deltas( - xfs_trans_t *tp) -{ - int i, j; - xfs_dquot_t *dqp; - xfs_dqtrx_t *qtrx, *qa; - xfs_disk_dquot_t *d; - long totalbdelta; - long totalrtbdelta; - - if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY)) - return; - - ASSERT(tp->t_dqinfo); - qa = tp->t_dqinfo->dqa_usrdquots; - for (j = 0; j < 2; j++) { - if (qa[0].qt_dquot == NULL) { - qa = tp->t_dqinfo->dqa_grpdquots; - continue; - } - - /* - * Lock all of the dquots and join them to the transaction. - */ - xfs_trans_dqlockedjoin(tp, qa); - - for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { - qtrx = &qa[i]; - /* - * The array of dquots is filled - * sequentially, not sparsely. - */ - if ((dqp = qtrx->qt_dquot) == NULL) - break; - - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(dqp->q_transp == tp); - - /* - * adjust the actual number of blocks used - */ - d = &dqp->q_core; - - /* - * The issue here is - sometimes we don't make a blkquota - * reservation intentionally to be fair to users - * (when the amount is small). On the other hand, - * delayed allocs do make reservations, but that's - * outside of a transaction, so we have no - * idea how much was really reserved. - * So, here we've accumulated delayed allocation blks and - * non-delay blks. The assumption is that the - * delayed ones are always reserved (outside of a - * transaction), and the others may or may not have - * quota reservations. - */ - totalbdelta = qtrx->qt_bcount_delta + - qtrx->qt_delbcnt_delta; - totalrtbdelta = qtrx->qt_rtbcount_delta + - qtrx->qt_delrtb_delta; -#ifdef DEBUG - if (totalbdelta < 0) - ASSERT(be64_to_cpu(d->d_bcount) >= - -totalbdelta); - - if (totalrtbdelta < 0) - ASSERT(be64_to_cpu(d->d_rtbcount) >= - -totalrtbdelta); - - if (qtrx->qt_icount_delta < 0) - ASSERT(be64_to_cpu(d->d_icount) >= - -qtrx->qt_icount_delta); -#endif - if (totalbdelta) - be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta); - - if (qtrx->qt_icount_delta) - be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta); - - if (totalrtbdelta) - be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta); - - /* - * Get any default limits in use. - * Start/reset the timer(s) if needed. - */ - if (d->d_id) { - xfs_qm_adjust_dqlimits(tp->t_mountp, d); - xfs_qm_adjust_dqtimers(tp->t_mountp, d); - } - - dqp->dq_flags |= XFS_DQ_DIRTY; - /* - * add this to the list of items to get logged - */ - xfs_trans_log_dquot(tp, dqp); - /* - * Take off what's left of the original reservation. - * In case of delayed allocations, there's no - * reservation that a transaction structure knows of. - */ - if (qtrx->qt_blk_res != 0) { - if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) { - if (qtrx->qt_blk_res > - qtrx->qt_blk_res_used) - dqp->q_res_bcount -= (xfs_qcnt_t) - (qtrx->qt_blk_res - - qtrx->qt_blk_res_used); - else - dqp->q_res_bcount -= (xfs_qcnt_t) - (qtrx->qt_blk_res_used - - qtrx->qt_blk_res); - } - } else { - /* - * These blks were never reserved, either inside - * a transaction or outside one (in a delayed - * allocation). Also, this isn't always a - * negative number since we sometimes - * deliberately skip quota reservations. - */ - if (qtrx->qt_bcount_delta) { - dqp->q_res_bcount += - (xfs_qcnt_t)qtrx->qt_bcount_delta; - } - } - /* - * Adjust the RT reservation. - */ - if (qtrx->qt_rtblk_res != 0) { - if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) { - if (qtrx->qt_rtblk_res > - qtrx->qt_rtblk_res_used) - dqp->q_res_rtbcount -= (xfs_qcnt_t) - (qtrx->qt_rtblk_res - - qtrx->qt_rtblk_res_used); - else - dqp->q_res_rtbcount -= (xfs_qcnt_t) - (qtrx->qt_rtblk_res_used - - qtrx->qt_rtblk_res); - } - } else { - if (qtrx->qt_rtbcount_delta) - dqp->q_res_rtbcount += - (xfs_qcnt_t)qtrx->qt_rtbcount_delta; - } - - /* - * Adjust the inode reservation. - */ - if (qtrx->qt_ino_res != 0) { - ASSERT(qtrx->qt_ino_res >= - qtrx->qt_ino_res_used); - if (qtrx->qt_ino_res > qtrx->qt_ino_res_used) - dqp->q_res_icount -= (xfs_qcnt_t) - (qtrx->qt_ino_res - - qtrx->qt_ino_res_used); - } else { - if (qtrx->qt_icount_delta) - dqp->q_res_icount += - (xfs_qcnt_t)qtrx->qt_icount_delta; - } - - ASSERT(dqp->q_res_bcount >= - be64_to_cpu(dqp->q_core.d_bcount)); - ASSERT(dqp->q_res_icount >= - be64_to_cpu(dqp->q_core.d_icount)); - ASSERT(dqp->q_res_rtbcount >= - be64_to_cpu(dqp->q_core.d_rtbcount)); - } - /* - * Do the group quotas next - */ - qa = tp->t_dqinfo->dqa_grpdquots; - } -} - -/* - * Release the reservations, and adjust the dquots accordingly. - * This is called only when the transaction is being aborted. If by - * any chance we have done dquot modifications incore (ie. deltas) already, - * we simply throw those away, since that's the expected behavior - * when a transaction is curtailed without a commit. - */ -void -xfs_trans_unreserve_and_mod_dquots( - xfs_trans_t *tp) -{ - int i, j; - xfs_dquot_t *dqp; - xfs_dqtrx_t *qtrx, *qa; - boolean_t locked; - - if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) - return; - - qa = tp->t_dqinfo->dqa_usrdquots; - - for (j = 0; j < 2; j++) { - for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { - qtrx = &qa[i]; - /* - * We assume that the array of dquots is filled - * sequentially, not sparsely. - */ - if ((dqp = qtrx->qt_dquot) == NULL) - break; - /* - * Unreserve the original reservation. We don't care - * about the number of blocks used field, or deltas. - * Also we don't bother to zero the fields. - */ - locked = B_FALSE; - if (qtrx->qt_blk_res) { - xfs_dqlock(dqp); - locked = B_TRUE; - dqp->q_res_bcount -= - (xfs_qcnt_t)qtrx->qt_blk_res; - } - if (qtrx->qt_ino_res) { - if (!locked) { - xfs_dqlock(dqp); - locked = B_TRUE; - } - dqp->q_res_icount -= - (xfs_qcnt_t)qtrx->qt_ino_res; - } - - if (qtrx->qt_rtblk_res) { - if (!locked) { - xfs_dqlock(dqp); - locked = B_TRUE; - } - dqp->q_res_rtbcount -= - (xfs_qcnt_t)qtrx->qt_rtblk_res; - } - if (locked) - xfs_dqunlock(dqp); - - } - qa = tp->t_dqinfo->dqa_grpdquots; - } -} - -STATIC void -xfs_quota_warn( - struct xfs_mount *mp, - struct xfs_dquot *dqp, - int type) -{ - /* no warnings for project quotas - we just return ENOSPC later */ - if (dqp->dq_flags & XFS_DQ_PROJ) - return; - quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA, - be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev, - type); -} - -/* - * This reserves disk blocks and inodes against a dquot. - * Flags indicate if the dquot is to be locked here and also - * if the blk reservation is for RT or regular blocks. - * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check. - */ -STATIC int -xfs_trans_dqresv( - xfs_trans_t *tp, - xfs_mount_t *mp, - xfs_dquot_t *dqp, - long nblks, - long ninos, - uint flags) -{ - xfs_qcnt_t hardlimit; - xfs_qcnt_t softlimit; - time_t timer; - xfs_qwarncnt_t warns; - xfs_qwarncnt_t warnlimit; - xfs_qcnt_t count; - xfs_qcnt_t *resbcountp; - xfs_quotainfo_t *q = mp->m_quotainfo; - - - xfs_dqlock(dqp); - - if (flags & XFS_TRANS_DQ_RES_BLKS) { - hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit); - if (!hardlimit) - hardlimit = q->qi_bhardlimit; - softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit); - if (!softlimit) - softlimit = q->qi_bsoftlimit; - timer = be32_to_cpu(dqp->q_core.d_btimer); - warns = be16_to_cpu(dqp->q_core.d_bwarns); - warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit; - resbcountp = &dqp->q_res_bcount; - } else { - ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); - hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit); - if (!hardlimit) - hardlimit = q->qi_rtbhardlimit; - softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit); - if (!softlimit) - softlimit = q->qi_rtbsoftlimit; - timer = be32_to_cpu(dqp->q_core.d_rtbtimer); - warns = be16_to_cpu(dqp->q_core.d_rtbwarns); - warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit; - resbcountp = &dqp->q_res_rtbcount; - } - - if ((flags & XFS_QMOPT_FORCE_RES) == 0 && - dqp->q_core.d_id && - ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || - (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && - (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { - if (nblks > 0) { - /* - * dquot is locked already. See if we'd go over the - * hardlimit or exceed the timelimit if we allocate - * nblks. - */ - if (hardlimit > 0ULL && - hardlimit <= nblks + *resbcountp) { - xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); - goto error_return; - } - if (softlimit > 0ULL && - softlimit <= nblks + *resbcountp) { - if ((timer != 0 && get_seconds() > timer) || - (warns != 0 && warns >= warnlimit)) { - xfs_quota_warn(mp, dqp, - QUOTA_NL_BSOFTLONGWARN); - goto error_return; - } - - xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN); - } - } - if (ninos > 0) { - count = be64_to_cpu(dqp->q_core.d_icount); - timer = be32_to_cpu(dqp->q_core.d_itimer); - warns = be16_to_cpu(dqp->q_core.d_iwarns); - warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; - hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); - if (!hardlimit) - hardlimit = q->qi_ihardlimit; - softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); - if (!softlimit) - softlimit = q->qi_isoftlimit; - - if (hardlimit > 0ULL && count >= hardlimit) { - xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); - goto error_return; - } - if (softlimit > 0ULL && count >= softlimit) { - if ((timer != 0 && get_seconds() > timer) || - (warns != 0 && warns >= warnlimit)) { - xfs_quota_warn(mp, dqp, - QUOTA_NL_ISOFTLONGWARN); - goto error_return; - } - xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN); - } - } - } - - /* - * Change the reservation, but not the actual usage. - * Note that q_res_bcount = q_core.d_bcount + resv - */ - (*resbcountp) += (xfs_qcnt_t)nblks; - if (ninos != 0) - dqp->q_res_icount += (xfs_qcnt_t)ninos; - - /* - * note the reservation amt in the trans struct too, - * so that the transaction knows how much was reserved by - * it against this particular dquot. - * We don't do this when we are reserving for a delayed allocation, - * because we don't have the luxury of a transaction envelope then. - */ - if (tp) { - ASSERT(tp->t_dqinfo); - ASSERT(flags & XFS_QMOPT_RESBLK_MASK); - if (nblks != 0) - xfs_trans_mod_dquot(tp, dqp, - flags & XFS_QMOPT_RESBLK_MASK, - nblks); - if (ninos != 0) - xfs_trans_mod_dquot(tp, dqp, - XFS_TRANS_DQ_RES_INOS, - ninos); - } - ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount)); - ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount)); - ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); - - xfs_dqunlock(dqp); - return 0; - -error_return: - xfs_dqunlock(dqp); - if (flags & XFS_QMOPT_ENOSPC) - return ENOSPC; - return EDQUOT; -} - - -/* - * Given dquot(s), make disk block and/or inode reservations against them. - * The fact that this does the reservation against both the usr and - * grp/prj quotas is important, because this follows a both-or-nothing - * approach. - * - * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. - * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota. - * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks - * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks - * dquots are unlocked on return, if they were not locked by caller. - */ -int -xfs_trans_reserve_quota_bydquots( - xfs_trans_t *tp, - xfs_mount_t *mp, - xfs_dquot_t *udqp, - xfs_dquot_t *gdqp, - long nblks, - long ninos, - uint flags) -{ - int resvd = 0, error; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return 0; - - if (tp && tp->t_dqinfo == NULL) - xfs_trans_alloc_dqinfo(tp); - - ASSERT(flags & XFS_QMOPT_RESBLK_MASK); - - if (udqp) { - error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, - (flags & ~XFS_QMOPT_ENOSPC)); - if (error) - return error; - resvd = 1; - } - - if (gdqp) { - error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); - if (error) { - /* - * can't do it, so backout previous reservation - */ - if (resvd) { - flags |= XFS_QMOPT_FORCE_RES; - xfs_trans_dqresv(tp, mp, udqp, - -nblks, -ninos, flags); - } - return error; - } - } - - /* - * Didn't change anything critical, so, no need to log - */ - return 0; -} - - -/* - * Lock the dquot and change the reservation if we can. - * This doesn't change the actual usage, just the reservation. - * The inode sent in is locked. - */ -int -xfs_trans_reserve_quota_nblks( - struct xfs_trans *tp, - struct xfs_inode *ip, - long nblks, - long ninos, - uint flags) -{ - struct xfs_mount *mp = ip->i_mount; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return 0; - if (XFS_IS_PQUOTA_ON(mp)) - flags |= XFS_QMOPT_ENOSPC; - - ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); - ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == - XFS_TRANS_DQ_RES_RTBLKS || - (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == - XFS_TRANS_DQ_RES_BLKS); - - /* - * Reserve nblks against these dquots, with trans as the mediator. - */ - return xfs_trans_reserve_quota_bydquots(tp, mp, - ip->i_udquot, ip->i_gdquot, - nblks, ninos, flags); -} - -/* - * This routine is called to allocate a quotaoff log item. - */ -xfs_qoff_logitem_t * -xfs_trans_get_qoff_item( - xfs_trans_t *tp, - xfs_qoff_logitem_t *startqoff, - uint flags) -{ - xfs_qoff_logitem_t *q; - - ASSERT(tp != NULL); - - q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags); - ASSERT(q != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &q->qql_item); - return q; -} - - -/* - * This is called to mark the quotaoff logitem as needing - * to be logged when the transaction is committed. The logitem must - * already be associated with the given transaction. - */ -void -xfs_trans_log_quotaoff_item( - xfs_trans_t *tp, - xfs_qoff_logitem_t *qlp) -{ - tp->t_flags |= XFS_TRANS_DIRTY; - qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY; -} - -STATIC void -xfs_trans_alloc_dqinfo( - xfs_trans_t *tp) -{ - tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); -} - -void -xfs_trans_free_dqinfo( - xfs_trans_t *tp) -{ - if (!tp->t_dqinfo) - return; - kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo); - tp->t_dqinfo = NULL; -} diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c deleted file mode 100644 index b83f76b..0000000 --- a/fs/xfs/support/uuid.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include - -/* IRIX interpretation of an uuid_t */ -typedef struct { - __be32 uu_timelow; - __be16 uu_timemid; - __be16 uu_timehi; - __be16 uu_clockseq; - __be16 uu_node[3]; -} xfs_uu_t; - -/* - * uuid_getnodeuniq - obtain the node unique fields of a UUID. - * - * This is not in any way a standard or condoned UUID function; - * it just something that's needed for user-level file handles. - */ -void -uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) -{ - xfs_uu_t *uup = (xfs_uu_t *)uuid; - - fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) | - be16_to_cpu(uup->uu_timemid); - fsid[1] = be32_to_cpu(uup->uu_timelow); -} - -int -uuid_is_nil(uuid_t *uuid) -{ - int i; - char *cp = (char *)uuid; - - if (uuid == NULL) - return 0; - /* implied check of version number here... */ - for (i = 0; i < sizeof *uuid; i++) - if (*cp++) return 0; /* not nil */ - return 1; /* is nil */ -} - -int -uuid_equal(uuid_t *uuid1, uuid_t *uuid2) -{ - return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1; -} diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h deleted file mode 100644 index 4732d71..0000000 --- a/fs/xfs/support/uuid.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_UUID_H__ -#define __XFS_SUPPORT_UUID_H__ - -typedef struct { - unsigned char __u_bits[16]; -} uuid_t; - -extern int uuid_is_nil(uuid_t *uuid); -extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); -extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); - -#endif /* __XFS_SUPPORT_UUID_H__ */ diff --git a/fs/xfs/time.h b/fs/xfs/time.h new file mode 100644 index 0000000..387e695 --- /dev/null +++ b/fs/xfs/time.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SUPPORT_TIME_H__ +#define __XFS_SUPPORT_TIME_H__ + +#include +#include + +typedef struct timespec timespec_t; + +static inline void delay(long ticks) +{ + schedule_timeout_uninterruptible(ticks); +} + +static inline void nanotime(struct timespec *tvp) +{ + *tvp = CURRENT_TIME; +} + +#endif /* __XFS_SUPPORT_TIME_H__ */ diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c new file mode 100644 index 0000000..b83f76b --- /dev/null +++ b/fs/xfs/uuid.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include + +/* IRIX interpretation of an uuid_t */ +typedef struct { + __be32 uu_timelow; + __be16 uu_timemid; + __be16 uu_timehi; + __be16 uu_clockseq; + __be16 uu_node[3]; +} xfs_uu_t; + +/* + * uuid_getnodeuniq - obtain the node unique fields of a UUID. + * + * This is not in any way a standard or condoned UUID function; + * it just something that's needed for user-level file handles. + */ +void +uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) +{ + xfs_uu_t *uup = (xfs_uu_t *)uuid; + + fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) | + be16_to_cpu(uup->uu_timemid); + fsid[1] = be32_to_cpu(uup->uu_timelow); +} + +int +uuid_is_nil(uuid_t *uuid) +{ + int i; + char *cp = (char *)uuid; + + if (uuid == NULL) + return 0; + /* implied check of version number here... */ + for (i = 0; i < sizeof *uuid; i++) + if (*cp++) return 0; /* not nil */ + return 1; /* is nil */ +} + +int +uuid_equal(uuid_t *uuid1, uuid_t *uuid2) +{ + return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1; +} diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h new file mode 100644 index 0000000..4732d71 --- /dev/null +++ b/fs/xfs/uuid.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SUPPORT_UUID_H__ +#define __XFS_SUPPORT_UUID_H__ + +typedef struct { + unsigned char __u_bits[16]; +} uuid_t; + +extern int uuid_is_nil(uuid_t *uuid); +extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); +extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); + +#endif /* __XFS_SUPPORT_UUID_H__ */ diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c new file mode 100644 index 0000000..b6c4b37 --- /dev/null +++ b/fs/xfs/xfs_acl.c @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2008, Christoph Hellwig + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_vnodeops.h" +#include "xfs_trace.h" +#include +#include +#include + + +/* + * Locking scheme: + * - all ACL updates are protected by inode->i_mutex, which is taken before + * calling into this file. + */ + +STATIC struct posix_acl * +xfs_acl_from_disk(struct xfs_acl *aclp) +{ + struct posix_acl_entry *acl_e; + struct posix_acl *acl; + struct xfs_acl_entry *ace; + int count, i; + + count = be32_to_cpu(aclp->acl_cnt); + + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + acl_e = &acl->a_entries[i]; + ace = &aclp->acl_entry[i]; + + /* + * The tag is 32 bits on disk and 16 bits in core. + * + * Because every access to it goes through the core + * format first this is not a problem. + */ + acl_e->e_tag = be32_to_cpu(ace->ae_tag); + acl_e->e_perm = be16_to_cpu(ace->ae_perm); + + switch (acl_e->e_tag) { + case ACL_USER: + case ACL_GROUP: + acl_e->e_id = be32_to_cpu(ace->ae_id); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + acl_e->e_id = ACL_UNDEFINED_ID; + break; + default: + goto fail; + } + } + return acl; + +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +STATIC void +xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) +{ + const struct posix_acl_entry *acl_e; + struct xfs_acl_entry *ace; + int i; + + aclp->acl_cnt = cpu_to_be32(acl->a_count); + for (i = 0; i < acl->a_count; i++) { + ace = &aclp->acl_entry[i]; + acl_e = &acl->a_entries[i]; + + ace->ae_tag = cpu_to_be32(acl_e->e_tag); + ace->ae_id = cpu_to_be32(acl_e->e_id); + ace->ae_perm = cpu_to_be16(acl_e->e_perm); + } +} + +struct posix_acl * +xfs_get_acl(struct inode *inode, int type) +{ + struct xfs_inode *ip = XFS_I(inode); + struct posix_acl *acl; + struct xfs_acl *xfs_acl; + int len = sizeof(struct xfs_acl); + unsigned char *ea_name; + int error; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + trace_xfs_get_acl(ip); + + switch (type) { + case ACL_TYPE_ACCESS: + ea_name = SGI_ACL_FILE; + break; + case ACL_TYPE_DEFAULT: + ea_name = SGI_ACL_DEFAULT; + break; + default: + BUG(); + } + + /* + * If we have a cached ACLs value just return it, not need to + * go out to the disk. + */ + + xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); + if (!xfs_acl) + return ERR_PTR(-ENOMEM); + + error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, + &len, ATTR_ROOT); + if (error) { + /* + * If the attribute doesn't exist make sure we have a negative + * cache entry, for any other error assume it is transient and + * leave the cache entry as ACL_NOT_CACHED. + */ + if (error == -ENOATTR) { + acl = NULL; + goto out_update_cache; + } + goto out; + } + + acl = xfs_acl_from_disk(xfs_acl); + if (IS_ERR(acl)) + goto out; + + out_update_cache: + set_cached_acl(inode, type, acl); + out: + kfree(xfs_acl); + return acl; +} + +STATIC int +xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct xfs_inode *ip = XFS_I(inode); + unsigned char *ea_name; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + ea_name = SGI_ACL_FILE; + break; + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + ea_name = SGI_ACL_DEFAULT; + break; + default: + return -EINVAL; + } + + if (acl) { + struct xfs_acl *xfs_acl; + int len; + + xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); + if (!xfs_acl) + return -ENOMEM; + + xfs_acl_to_disk(xfs_acl, acl); + len = sizeof(struct xfs_acl) - + (sizeof(struct xfs_acl_entry) * + (XFS_ACL_MAX_ENTRIES - acl->a_count)); + + error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, + len, ATTR_ROOT); + + kfree(xfs_acl); + } else { + /* + * A NULL ACL argument means we want to remove the ACL. + */ + error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT); + + /* + * If the attribute didn't exist to start with that's fine. + */ + if (error == -ENOATTR) + error = 0; + } + + if (!error) + set_cached_acl(inode, type, acl); + return error; +} + +static int +xfs_set_mode(struct inode *inode, umode_t mode) +{ + int error = 0; + + if (mode != inode->i_mode) { + struct iattr iattr; + + iattr.ia_valid = ATTR_MODE | ATTR_CTIME; + iattr.ia_mode = mode; + iattr.ia_ctime = current_fs_time(inode->i_sb); + + error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); + } + + return error; +} + +static int +xfs_acl_exists(struct inode *inode, unsigned char *name) +{ + int len = sizeof(struct xfs_acl); + + return (xfs_attr_get(XFS_I(inode), name, NULL, &len, + ATTR_ROOT|ATTR_KERNOVAL) == 0); +} + +int +posix_acl_access_exists(struct inode *inode) +{ + return xfs_acl_exists(inode, SGI_ACL_FILE); +} + +int +posix_acl_default_exists(struct inode *inode) +{ + if (!S_ISDIR(inode->i_mode)) + return 0; + return xfs_acl_exists(inode, SGI_ACL_DEFAULT); +} + +/* + * No need for i_mutex because the inode is not yet exposed to the VFS. + */ +int +xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) +{ + umode_t mode = inode->i_mode; + int error = 0, inherit = 0; + + if (S_ISDIR(inode->i_mode)) { + error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl); + if (error) + goto out; + } + + error = posix_acl_create(&acl, GFP_KERNEL, &mode); + if (error < 0) + return error; + + /* + * If posix_acl_create returns a positive value we need to + * inherit a permission that can't be represented using the Unix + * mode bits and we actually need to set an ACL. + */ + if (error > 0) + inherit = 1; + + error = xfs_set_mode(inode, mode); + if (error) + goto out; + + if (inherit) + error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); + +out: + posix_acl_release(acl); + return error; +} + +int +xfs_acl_chmod(struct inode *inode) +{ + struct posix_acl *acl; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (error) + return error; + + error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); + posix_acl_release(acl); + return error; +} + +static int +xfs_xattr_acl_get(struct dentry *dentry, const char *name, + void *value, size_t size, int type) +{ + struct posix_acl *acl; + int error; + + acl = xfs_get_acl(dentry->d_inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + + error = posix_acl_to_xattr(acl, value, size); + posix_acl_release(acl); + + return error; +} + +static int +xfs_xattr_acl_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl = NULL; + int error = 0; + + if (flags & XATTR_CREATE) + return -EINVAL; + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return value ? -EACCES : 0; + if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + + if (!value) + goto set_acl; + + acl = posix_acl_from_xattr(value, size); + if (!acl) { + /* + * acl_set_file(3) may request that we set default ACLs with + * zero length -- defend (gracefully) against that here. + */ + goto out; + } + if (IS_ERR(acl)) { + error = PTR_ERR(acl); + goto out; + } + + error = posix_acl_valid(acl); + if (error) + goto out_release; + + error = -EINVAL; + if (acl->a_count > XFS_ACL_MAX_ENTRIES) + goto out_release; + + if (type == ACL_TYPE_ACCESS) { + umode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + + if (error <= 0) { + posix_acl_release(acl); + acl = NULL; + + if (error < 0) + return error; + } + + error = xfs_set_mode(inode, mode); + if (error) + goto out_release; + } + + set_acl: + error = xfs_set_acl(inode, type, acl); + out_release: + posix_acl_release(acl); + out: + return error; +} + +const struct xattr_handler xfs_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .flags = ACL_TYPE_ACCESS, + .get = xfs_xattr_acl_get, + .set = xfs_xattr_acl_set, +}; + +const struct xattr_handler xfs_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .get = xfs_xattr_acl_get, + .set = xfs_xattr_acl_set, +}; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c new file mode 100644 index 0000000..63e971e --- /dev/null +++ b/fs/xfs/xfs_aops.c @@ -0,0 +1,1499 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_trans.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_error.h" +#include "xfs_rw.h" +#include "xfs_iomap.h" +#include "xfs_vnodeops.h" +#include "xfs_trace.h" +#include "xfs_bmap.h" +#include +#include +#include +#include + + +/* + * Prime number of hash buckets since address is used as the key. + */ +#define NVSYNC 37 +#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC]) +static wait_queue_head_t xfs_ioend_wq[NVSYNC]; + +void __init +xfs_ioend_init(void) +{ + int i; + + for (i = 0; i < NVSYNC; i++) + init_waitqueue_head(&xfs_ioend_wq[i]); +} + +void +xfs_ioend_wait( + xfs_inode_t *ip) +{ + wait_queue_head_t *wq = to_ioend_wq(ip); + + wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); +} + +STATIC void +xfs_ioend_wake( + xfs_inode_t *ip) +{ + if (atomic_dec_and_test(&ip->i_iocount)) + wake_up(to_ioend_wq(ip)); +} + +void +xfs_count_page_state( + struct page *page, + int *delalloc, + int *unwritten) +{ + struct buffer_head *bh, *head; + + *delalloc = *unwritten = 0; + + bh = head = page_buffers(page); + do { + if (buffer_unwritten(bh)) + (*unwritten) = 1; + else if (buffer_delay(bh)) + (*delalloc) = 1; + } while ((bh = bh->b_this_page) != head); +} + +STATIC struct block_device * +xfs_find_bdev_for_inode( + struct inode *inode) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + if (XFS_IS_REALTIME_INODE(ip)) + return mp->m_rtdev_targp->bt_bdev; + else + return mp->m_ddev_targp->bt_bdev; +} + +/* + * We're now finished for good with this ioend structure. + * Update the page state via the associated buffer_heads, + * release holds on the inode and bio, and finally free + * up memory. Do not use the ioend after this. + */ +STATIC void +xfs_destroy_ioend( + xfs_ioend_t *ioend) +{ + struct buffer_head *bh, *next; + struct xfs_inode *ip = XFS_I(ioend->io_inode); + + for (bh = ioend->io_buffer_head; bh; bh = next) { + next = bh->b_private; + bh->b_end_io(bh, !ioend->io_error); + } + + /* + * Volume managers supporting multiple paths can send back ENODEV + * when the final path disappears. In this case continuing to fill + * the page cache with dirty data which cannot be written out is + * evil, so prevent that. + */ + if (unlikely(ioend->io_error == -ENODEV)) { + xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, + __FILE__, __LINE__); + } + + xfs_ioend_wake(ip); + mempool_free(ioend, xfs_ioend_pool); +} + +/* + * If the end of the current ioend is beyond the current EOF, + * return the new EOF value, otherwise zero. + */ +STATIC xfs_fsize_t +xfs_ioend_new_eof( + xfs_ioend_t *ioend) +{ + xfs_inode_t *ip = XFS_I(ioend->io_inode); + xfs_fsize_t isize; + xfs_fsize_t bsize; + + bsize = ioend->io_offset + ioend->io_size; + isize = MAX(ip->i_size, ip->i_new_size); + isize = MIN(isize, bsize); + return isize > ip->i_d.di_size ? isize : 0; +} + +/* + * Update on-disk file size now that data has been written to disk. The + * current in-memory file size is i_size. If a write is beyond eof i_new_size + * will be the intended file size until i_size is updated. If this write does + * not extend all the way to the valid file size then restrict this update to + * the end of the write. + * + * This function does not block as blocking on the inode lock in IO completion + * can lead to IO completion order dependency deadlocks.. If it can't get the + * inode ilock it will return EAGAIN. Callers must handle this. + */ +STATIC int +xfs_setfilesize( + xfs_ioend_t *ioend) +{ + xfs_inode_t *ip = XFS_I(ioend->io_inode); + xfs_fsize_t isize; + + if (unlikely(ioend->io_error)) + return 0; + + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) + return EAGAIN; + + isize = xfs_ioend_new_eof(ioend); + if (isize) { + trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); + ip->i_d.di_size = isize; + xfs_mark_inode_dirty(ip); + } + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return 0; +} + +/* + * Schedule IO completion handling on the final put of an ioend. + */ +STATIC void +xfs_finish_ioend( + struct xfs_ioend *ioend) +{ + if (atomic_dec_and_test(&ioend->io_remaining)) { + if (ioend->io_type == IO_UNWRITTEN) + queue_work(xfsconvertd_workqueue, &ioend->io_work); + else + queue_work(xfsdatad_workqueue, &ioend->io_work); + } +} + +/* + * IO write completion. + */ +STATIC void +xfs_end_io( + struct work_struct *work) +{ + xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); + struct xfs_inode *ip = XFS_I(ioend->io_inode); + int error = 0; + + /* + * For unwritten extents we need to issue transactions to convert a + * range to normal written extens after the data I/O has finished. + */ + if (ioend->io_type == IO_UNWRITTEN && + likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { + + error = xfs_iomap_write_unwritten(ip, ioend->io_offset, + ioend->io_size); + if (error) + ioend->io_error = error; + } + + /* + * We might have to update the on-disk file size after extending + * writes. + */ + error = xfs_setfilesize(ioend); + ASSERT(!error || error == EAGAIN); + + /* + * If we didn't complete processing of the ioend, requeue it to the + * tail of the workqueue for another attempt later. Otherwise destroy + * it. + */ + if (error == EAGAIN) { + atomic_inc(&ioend->io_remaining); + xfs_finish_ioend(ioend); + /* ensure we don't spin on blocked ioends */ + delay(1); + } else { + if (ioend->io_iocb) + aio_complete(ioend->io_iocb, ioend->io_result, 0); + xfs_destroy_ioend(ioend); + } +} + +/* + * Call IO completion handling in caller context on the final put of an ioend. + */ +STATIC void +xfs_finish_ioend_sync( + struct xfs_ioend *ioend) +{ + if (atomic_dec_and_test(&ioend->io_remaining)) + xfs_end_io(&ioend->io_work); +} + +/* + * Allocate and initialise an IO completion structure. + * We need to track unwritten extent write completion here initially. + * We'll need to extend this for updating the ondisk inode size later + * (vs. incore size). + */ +STATIC xfs_ioend_t * +xfs_alloc_ioend( + struct inode *inode, + unsigned int type) +{ + xfs_ioend_t *ioend; + + ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); + + /* + * Set the count to 1 initially, which will prevent an I/O + * completion callback from happening before we have started + * all the I/O from calling the completion routine too early. + */ + atomic_set(&ioend->io_remaining, 1); + ioend->io_error = 0; + ioend->io_list = NULL; + ioend->io_type = type; + ioend->io_inode = inode; + ioend->io_buffer_head = NULL; + ioend->io_buffer_tail = NULL; + atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); + ioend->io_offset = 0; + ioend->io_size = 0; + ioend->io_iocb = NULL; + ioend->io_result = 0; + + INIT_WORK(&ioend->io_work, xfs_end_io); + return ioend; +} + +STATIC int +xfs_map_blocks( + struct inode *inode, + loff_t offset, + struct xfs_bmbt_irec *imap, + int type, + int nonblocking) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + ssize_t count = 1 << inode->i_blkbits; + xfs_fileoff_t offset_fsb, end_fsb; + int error = 0; + int bmapi_flags = XFS_BMAPI_ENTIRE; + int nimaps = 1; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + if (type == IO_UNWRITTEN) + bmapi_flags |= XFS_BMAPI_IGSTATE; + + if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { + if (nonblocking) + return -XFS_ERROR(EAGAIN); + xfs_ilock(ip, XFS_ILOCK_SHARED); + } + + ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || + (ip->i_df.if_flags & XFS_IFEXTENTS)); + ASSERT(offset <= mp->m_maxioffset); + + if (offset + count > mp->m_maxioffset) + count = mp->m_maxioffset - offset; + end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); + offset_fsb = XFS_B_TO_FSBT(mp, offset); + error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, + bmapi_flags, NULL, 0, imap, &nimaps, NULL); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + if (error) + return -XFS_ERROR(error); + + if (type == IO_DELALLOC && + (!nimaps || isnullstartblock(imap->br_startblock))) { + error = xfs_iomap_write_allocate(ip, offset, count, imap); + if (!error) + trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); + return -XFS_ERROR(error); + } + +#ifdef DEBUG + if (type == IO_UNWRITTEN) { + ASSERT(nimaps); + ASSERT(imap->br_startblock != HOLESTARTBLOCK); + ASSERT(imap->br_startblock != DELAYSTARTBLOCK); + } +#endif + if (nimaps) + trace_xfs_map_blocks_found(ip, offset, count, type, imap); + return 0; +} + +STATIC int +xfs_imap_valid( + struct inode *inode, + struct xfs_bmbt_irec *imap, + xfs_off_t offset) +{ + offset >>= inode->i_blkbits; + + return offset >= imap->br_startoff && + offset < imap->br_startoff + imap->br_blockcount; +} + +/* + * BIO completion handler for buffered IO. + */ +STATIC void +xfs_end_bio( + struct bio *bio, + int error) +{ + xfs_ioend_t *ioend = bio->bi_private; + + ASSERT(atomic_read(&bio->bi_cnt) >= 1); + ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; + + /* Toss bio and pass work off to an xfsdatad thread */ + bio->bi_private = NULL; + bio->bi_end_io = NULL; + bio_put(bio); + + xfs_finish_ioend(ioend); +} + +STATIC void +xfs_submit_ioend_bio( + struct writeback_control *wbc, + xfs_ioend_t *ioend, + struct bio *bio) +{ + atomic_inc(&ioend->io_remaining); + bio->bi_private = ioend; + bio->bi_end_io = xfs_end_bio; + + /* + * If the I/O is beyond EOF we mark the inode dirty immediately + * but don't update the inode size until I/O completion. + */ + if (xfs_ioend_new_eof(ioend)) + xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); + + submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); +} + +STATIC struct bio * +xfs_alloc_ioend_bio( + struct buffer_head *bh) +{ + int nvecs = bio_get_nr_vecs(bh->b_bdev); + struct bio *bio = bio_alloc(GFP_NOIO, nvecs); + + ASSERT(bio->bi_private == NULL); + bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_bdev = bh->b_bdev; + return bio; +} + +STATIC void +xfs_start_buffer_writeback( + struct buffer_head *bh) +{ + ASSERT(buffer_mapped(bh)); + ASSERT(buffer_locked(bh)); + ASSERT(!buffer_delay(bh)); + ASSERT(!buffer_unwritten(bh)); + + mark_buffer_async_write(bh); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); +} + +STATIC void +xfs_start_page_writeback( + struct page *page, + int clear_dirty, + int buffers) +{ + ASSERT(PageLocked(page)); + ASSERT(!PageWriteback(page)); + if (clear_dirty) + clear_page_dirty_for_io(page); + set_page_writeback(page); + unlock_page(page); + /* If no buffers on the page are to be written, finish it here */ + if (!buffers) + end_page_writeback(page); +} + +static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) +{ + return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); +} + +/* + * Submit all of the bios for all of the ioends we have saved up, covering the + * initial writepage page and also any probed pages. + * + * Because we may have multiple ioends spanning a page, we need to start + * writeback on all the buffers before we submit them for I/O. If we mark the + * buffers as we got, then we can end up with a page that only has buffers + * marked async write and I/O complete on can occur before we mark the other + * buffers async write. + * + * The end result of this is that we trip a bug in end_page_writeback() because + * we call it twice for the one page as the code in end_buffer_async_write() + * assumes that all buffers on the page are started at the same time. + * + * The fix is two passes across the ioend list - one to start writeback on the + * buffer_heads, and then submit them for I/O on the second pass. + */ +STATIC void +xfs_submit_ioend( + struct writeback_control *wbc, + xfs_ioend_t *ioend) +{ + xfs_ioend_t *head = ioend; + xfs_ioend_t *next; + struct buffer_head *bh; + struct bio *bio; + sector_t lastblock = 0; + + /* Pass 1 - start writeback */ + do { + next = ioend->io_list; + for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) + xfs_start_buffer_writeback(bh); + } while ((ioend = next) != NULL); + + /* Pass 2 - submit I/O */ + ioend = head; + do { + next = ioend->io_list; + bio = NULL; + + for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { + + if (!bio) { + retry: + bio = xfs_alloc_ioend_bio(bh); + } else if (bh->b_blocknr != lastblock + 1) { + xfs_submit_ioend_bio(wbc, ioend, bio); + goto retry; + } + + if (bio_add_buffer(bio, bh) != bh->b_size) { + xfs_submit_ioend_bio(wbc, ioend, bio); + goto retry; + } + + lastblock = bh->b_blocknr; + } + if (bio) + xfs_submit_ioend_bio(wbc, ioend, bio); + xfs_finish_ioend(ioend); + } while ((ioend = next) != NULL); +} + +/* + * Cancel submission of all buffer_heads so far in this endio. + * Toss the endio too. Only ever called for the initial page + * in a writepage request, so only ever one page. + */ +STATIC void +xfs_cancel_ioend( + xfs_ioend_t *ioend) +{ + xfs_ioend_t *next; + struct buffer_head *bh, *next_bh; + + do { + next = ioend->io_list; + bh = ioend->io_buffer_head; + do { + next_bh = bh->b_private; + clear_buffer_async_write(bh); + unlock_buffer(bh); + } while ((bh = next_bh) != NULL); + + xfs_ioend_wake(XFS_I(ioend->io_inode)); + mempool_free(ioend, xfs_ioend_pool); + } while ((ioend = next) != NULL); +} + +/* + * Test to see if we've been building up a completion structure for + * earlier buffers -- if so, we try to append to this ioend if we + * can, otherwise we finish off any current ioend and start another. + * Return true if we've finished the given ioend. + */ +STATIC void +xfs_add_to_ioend( + struct inode *inode, + struct buffer_head *bh, + xfs_off_t offset, + unsigned int type, + xfs_ioend_t **result, + int need_ioend) +{ + xfs_ioend_t *ioend = *result; + + if (!ioend || need_ioend || type != ioend->io_type) { + xfs_ioend_t *previous = *result; + + ioend = xfs_alloc_ioend(inode, type); + ioend->io_offset = offset; + ioend->io_buffer_head = bh; + ioend->io_buffer_tail = bh; + if (previous) + previous->io_list = ioend; + *result = ioend; + } else { + ioend->io_buffer_tail->b_private = bh; + ioend->io_buffer_tail = bh; + } + + bh->b_private = NULL; + ioend->io_size += bh->b_size; +} + +STATIC void +xfs_map_buffer( + struct inode *inode, + struct buffer_head *bh, + struct xfs_bmbt_irec *imap, + xfs_off_t offset) +{ + sector_t bn; + struct xfs_mount *m = XFS_I(inode)->i_mount; + xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); + xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); + + ASSERT(imap->br_startblock != HOLESTARTBLOCK); + ASSERT(imap->br_startblock != DELAYSTARTBLOCK); + + bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + + ((offset - iomap_offset) >> inode->i_blkbits); + + ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); + + bh->b_blocknr = bn; + set_buffer_mapped(bh); +} + +STATIC void +xfs_map_at_offset( + struct inode *inode, + struct buffer_head *bh, + struct xfs_bmbt_irec *imap, + xfs_off_t offset) +{ + ASSERT(imap->br_startblock != HOLESTARTBLOCK); + ASSERT(imap->br_startblock != DELAYSTARTBLOCK); + + xfs_map_buffer(inode, bh, imap, offset); + set_buffer_mapped(bh); + clear_buffer_delay(bh); + clear_buffer_unwritten(bh); +} + +/* + * Test if a given page is suitable for writing as part of an unwritten + * or delayed allocate extent. + */ +STATIC int +xfs_is_delayed_page( + struct page *page, + unsigned int type) +{ + if (PageWriteback(page)) + return 0; + + if (page->mapping && page_has_buffers(page)) { + struct buffer_head *bh, *head; + int acceptable = 0; + + bh = head = page_buffers(page); + do { + if (buffer_unwritten(bh)) + acceptable = (type == IO_UNWRITTEN); + else if (buffer_delay(bh)) + acceptable = (type == IO_DELALLOC); + else if (buffer_dirty(bh) && buffer_mapped(bh)) + acceptable = (type == IO_OVERWRITE); + else + break; + } while ((bh = bh->b_this_page) != head); + + if (acceptable) + return 1; + } + + return 0; +} + +/* + * Allocate & map buffers for page given the extent map. Write it out. + * except for the original page of a writepage, this is called on + * delalloc/unwritten pages only, for the original page it is possible + * that the page has no mapping at all. + */ +STATIC int +xfs_convert_page( + struct inode *inode, + struct page *page, + loff_t tindex, + struct xfs_bmbt_irec *imap, + xfs_ioend_t **ioendp, + struct writeback_control *wbc) +{ + struct buffer_head *bh, *head; + xfs_off_t end_offset; + unsigned long p_offset; + unsigned int type; + int len, page_dirty; + int count = 0, done = 0, uptodate = 1; + xfs_off_t offset = page_offset(page); + + if (page->index != tindex) + goto fail; + if (!trylock_page(page)) + goto fail; + if (PageWriteback(page)) + goto fail_unlock_page; + if (page->mapping != inode->i_mapping) + goto fail_unlock_page; + if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) + goto fail_unlock_page; + + /* + * page_dirty is initially a count of buffers on the page before + * EOF and is decremented as we move each into a cleanable state. + * + * Derivation: + * + * End offset is the highest offset that this page should represent. + * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) + * will evaluate non-zero and be less than PAGE_CACHE_SIZE and + * hence give us the correct page_dirty count. On any other page, + * it will be zero and in that case we need page_dirty to be the + * count of buffers on the page. + */ + end_offset = min_t(unsigned long long, + (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, + i_size_read(inode)); + + len = 1 << inode->i_blkbits; + p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), + PAGE_CACHE_SIZE); + p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; + page_dirty = p_offset / len; + + bh = head = page_buffers(page); + do { + if (offset >= end_offset) + break; + if (!buffer_uptodate(bh)) + uptodate = 0; + if (!(PageUptodate(page) || buffer_uptodate(bh))) { + done = 1; + continue; + } + + if (buffer_unwritten(bh) || buffer_delay(bh) || + buffer_mapped(bh)) { + if (buffer_unwritten(bh)) + type = IO_UNWRITTEN; + else if (buffer_delay(bh)) + type = IO_DELALLOC; + else + type = IO_OVERWRITE; + + if (!xfs_imap_valid(inode, imap, offset)) { + done = 1; + continue; + } + + lock_buffer(bh); + if (type != IO_OVERWRITE) + xfs_map_at_offset(inode, bh, imap, offset); + xfs_add_to_ioend(inode, bh, offset, type, + ioendp, done); + + page_dirty--; + count++; + } else { + done = 1; + } + } while (offset += len, (bh = bh->b_this_page) != head); + + if (uptodate && bh == head) + SetPageUptodate(page); + + if (count) { + if (--wbc->nr_to_write <= 0 && + wbc->sync_mode == WB_SYNC_NONE) + done = 1; + } + xfs_start_page_writeback(page, !page_dirty, count); + + return done; + fail_unlock_page: + unlock_page(page); + fail: + return 1; +} + +/* + * Convert & write out a cluster of pages in the same extent as defined + * by mp and following the start page. + */ +STATIC void +xfs_cluster_write( + struct inode *inode, + pgoff_t tindex, + struct xfs_bmbt_irec *imap, + xfs_ioend_t **ioendp, + struct writeback_control *wbc, + pgoff_t tlast) +{ + struct pagevec pvec; + int done = 0, i; + + pagevec_init(&pvec, 0); + while (!done && tindex <= tlast) { + unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); + + if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) + break; + + for (i = 0; i < pagevec_count(&pvec); i++) { + done = xfs_convert_page(inode, pvec.pages[i], tindex++, + imap, ioendp, wbc); + if (done) + break; + } + + pagevec_release(&pvec); + cond_resched(); + } +} + +STATIC void +xfs_vm_invalidatepage( + struct page *page, + unsigned long offset) +{ + trace_xfs_invalidatepage(page->mapping->host, page, offset); + block_invalidatepage(page, offset); +} + +/* + * If the page has delalloc buffers on it, we need to punch them out before we + * invalidate the page. If we don't, we leave a stale delalloc mapping on the + * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read + * is done on that same region - the delalloc extent is returned when none is + * supposed to be there. + * + * We prevent this by truncating away the delalloc regions on the page before + * invalidating it. Because they are delalloc, we can do this without needing a + * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this + * truncation without a transaction as there is no space left for block + * reservation (typically why we see a ENOSPC in writeback). + * + * This is not a performance critical path, so for now just do the punching a + * buffer head at a time. + */ +STATIC void +xfs_aops_discard_page( + struct page *page) +{ + struct inode *inode = page->mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct buffer_head *bh, *head; + loff_t offset = page_offset(page); + + if (!xfs_is_delayed_page(page, IO_DELALLOC)) + goto out_invalidate; + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + goto out_invalidate; + + xfs_alert(ip->i_mount, + "page discard on page %p, inode 0x%llx, offset %llu.", + page, ip->i_ino, offset); + + xfs_ilock(ip, XFS_ILOCK_EXCL); + bh = head = page_buffers(page); + do { + int error; + xfs_fileoff_t start_fsb; + + if (!buffer_delay(bh)) + goto next_buffer; + + start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); + error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); + if (error) { + /* something screwed, just bail */ + if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_alert(ip->i_mount, + "page discard unable to remove delalloc mapping."); + } + break; + } +next_buffer: + offset += 1 << inode->i_blkbits; + + } while ((bh = bh->b_this_page) != head); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); +out_invalidate: + xfs_vm_invalidatepage(page, 0); + return; +} + +/* + * Write out a dirty page. + * + * For delalloc space on the page we need to allocate space and flush it. + * For unwritten space on the page we need to start the conversion to + * regular allocated space. + * For any other dirty buffer heads on the page we should flush them. + */ +STATIC int +xfs_vm_writepage( + struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct buffer_head *bh, *head; + struct xfs_bmbt_irec imap; + xfs_ioend_t *ioend = NULL, *iohead = NULL; + loff_t offset; + unsigned int type; + __uint64_t end_offset; + pgoff_t end_index, last_index; + ssize_t len; + int err, imap_valid = 0, uptodate = 1; + int count = 0; + int nonblocking = 0; + + trace_xfs_writepage(inode, page, 0); + + ASSERT(page_has_buffers(page)); + + /* + * Refuse to write the page out if we are called from reclaim context. + * + * This avoids stack overflows when called from deeply used stacks in + * random callers for direct reclaim or memcg reclaim. We explicitly + * allow reclaim from kswapd as the stack usage there is relatively low. + * + * This should really be done by the core VM, but until that happens + * filesystems like XFS, btrfs and ext4 have to take care of this + * by themselves. + */ + if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) + goto redirty; + + /* + * Given that we do not allow direct reclaim to call us, we should + * never be called while in a filesystem transaction. + */ + if (WARN_ON(current->flags & PF_FSTRANS)) + goto redirty; + + /* Is this page beyond the end of the file? */ + offset = i_size_read(inode); + end_index = offset >> PAGE_CACHE_SHIFT; + last_index = (offset - 1) >> PAGE_CACHE_SHIFT; + if (page->index >= end_index) { + if ((page->index >= end_index + 1) || + !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { + unlock_page(page); + return 0; + } + } + + end_offset = min_t(unsigned long long, + (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, + offset); + len = 1 << inode->i_blkbits; + + bh = head = page_buffers(page); + offset = page_offset(page); + type = IO_OVERWRITE; + + if (wbc->sync_mode == WB_SYNC_NONE) + nonblocking = 1; + + do { + int new_ioend = 0; + + if (offset >= end_offset) + break; + if (!buffer_uptodate(bh)) + uptodate = 0; + + /* + * set_page_dirty dirties all buffers in a page, independent + * of their state. The dirty state however is entirely + * meaningless for holes (!mapped && uptodate), so skip + * buffers covering holes here. + */ + if (!buffer_mapped(bh) && buffer_uptodate(bh)) { + imap_valid = 0; + continue; + } + + if (buffer_unwritten(bh)) { + if (type != IO_UNWRITTEN) { + type = IO_UNWRITTEN; + imap_valid = 0; + } + } else if (buffer_delay(bh)) { + if (type != IO_DELALLOC) { + type = IO_DELALLOC; + imap_valid = 0; + } + } else if (buffer_uptodate(bh)) { + if (type != IO_OVERWRITE) { + type = IO_OVERWRITE; + imap_valid = 0; + } + } else { + if (PageUptodate(page)) { + ASSERT(buffer_mapped(bh)); + imap_valid = 0; + } + continue; + } + + if (imap_valid) + imap_valid = xfs_imap_valid(inode, &imap, offset); + if (!imap_valid) { + /* + * If we didn't have a valid mapping then we need to + * put the new mapping into a separate ioend structure. + * This ensures non-contiguous extents always have + * separate ioends, which is particularly important + * for unwritten extent conversion at I/O completion + * time. + */ + new_ioend = 1; + err = xfs_map_blocks(inode, offset, &imap, type, + nonblocking); + if (err) + goto error; + imap_valid = xfs_imap_valid(inode, &imap, offset); + } + if (imap_valid) { + lock_buffer(bh); + if (type != IO_OVERWRITE) + xfs_map_at_offset(inode, bh, &imap, offset); + xfs_add_to_ioend(inode, bh, offset, type, &ioend, + new_ioend); + count++; + } + + if (!iohead) + iohead = ioend; + + } while (offset += len, ((bh = bh->b_this_page) != head)); + + if (uptodate && bh == head) + SetPageUptodate(page); + + xfs_start_page_writeback(page, 1, count); + + if (ioend && imap_valid) { + xfs_off_t end_index; + + end_index = imap.br_startoff + imap.br_blockcount; + + /* to bytes */ + end_index <<= inode->i_blkbits; + + /* to pages */ + end_index = (end_index - 1) >> PAGE_CACHE_SHIFT; + + /* check against file size */ + if (end_index > last_index) + end_index = last_index; + + xfs_cluster_write(inode, page->index + 1, &imap, &ioend, + wbc, end_index); + } + + if (iohead) + xfs_submit_ioend(wbc, iohead); + + return 0; + +error: + if (iohead) + xfs_cancel_ioend(iohead); + + if (err == -EAGAIN) + goto redirty; + + xfs_aops_discard_page(page); + ClearPageUptodate(page); + unlock_page(page); + return err; + +redirty: + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; +} + +STATIC int +xfs_vm_writepages( + struct address_space *mapping, + struct writeback_control *wbc) +{ + xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); + return generic_writepages(mapping, wbc); +} + +/* + * Called to move a page into cleanable state - and from there + * to be released. The page should already be clean. We always + * have buffer heads in this call. + * + * Returns 1 if the page is ok to release, 0 otherwise. + */ +STATIC int +xfs_vm_releasepage( + struct page *page, + gfp_t gfp_mask) +{ + int delalloc, unwritten; + + trace_xfs_releasepage(page->mapping->host, page, 0); + + xfs_count_page_state(page, &delalloc, &unwritten); + + if (WARN_ON(delalloc)) + return 0; + if (WARN_ON(unwritten)) + return 0; + + return try_to_free_buffers(page); +} + +STATIC int +__xfs_get_blocks( + struct inode *inode, + sector_t iblock, + struct buffer_head *bh_result, + int create, + int direct) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset_fsb, end_fsb; + int error = 0; + int lockmode = 0; + struct xfs_bmbt_irec imap; + int nimaps = 1; + xfs_off_t offset; + ssize_t size; + int new = 0; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + offset = (xfs_off_t)iblock << inode->i_blkbits; + ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); + size = bh_result->b_size; + + if (!create && direct && offset >= i_size_read(inode)) + return 0; + + if (create) { + lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, lockmode); + } else { + lockmode = xfs_ilock_map_shared(ip); + } + + ASSERT(offset <= mp->m_maxioffset); + if (offset + size > mp->m_maxioffset) + size = mp->m_maxioffset - offset; + end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); + offset_fsb = XFS_B_TO_FSBT(mp, offset); + + error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, + XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL); + if (error) + goto out_unlock; + + if (create && + (!nimaps || + (imap.br_startblock == HOLESTARTBLOCK || + imap.br_startblock == DELAYSTARTBLOCK))) { + if (direct) { + error = xfs_iomap_write_direct(ip, offset, size, + &imap, nimaps); + } else { + error = xfs_iomap_write_delay(ip, offset, size, &imap); + } + if (error) + goto out_unlock; + + trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); + } else if (nimaps) { + trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); + } else { + trace_xfs_get_blocks_notfound(ip, offset, size); + goto out_unlock; + } + xfs_iunlock(ip, lockmode); + + if (imap.br_startblock != HOLESTARTBLOCK && + imap.br_startblock != DELAYSTARTBLOCK) { + /* + * For unwritten extents do not report a disk address on + * the read case (treat as if we're reading into a hole). + */ + if (create || !ISUNWRITTEN(&imap)) + xfs_map_buffer(inode, bh_result, &imap, offset); + if (create && ISUNWRITTEN(&imap)) { + if (direct) + bh_result->b_private = inode; + set_buffer_unwritten(bh_result); + } + } + + /* + * If this is a realtime file, data may be on a different device. + * to that pointed to from the buffer_head b_bdev currently. + */ + bh_result->b_bdev = xfs_find_bdev_for_inode(inode); + + /* + * If we previously allocated a block out beyond eof and we are now + * coming back to use it then we will need to flag it as new even if it + * has a disk address. + * + * With sub-block writes into unwritten extents we also need to mark + * the buffer as new so that the unwritten parts of the buffer gets + * correctly zeroed. + */ + if (create && + ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || + (offset >= i_size_read(inode)) || + (new || ISUNWRITTEN(&imap)))) + set_buffer_new(bh_result); + + if (imap.br_startblock == DELAYSTARTBLOCK) { + BUG_ON(direct); + if (create) { + set_buffer_uptodate(bh_result); + set_buffer_mapped(bh_result); + set_buffer_delay(bh_result); + } + } + + /* + * If this is O_DIRECT or the mpage code calling tell them how large + * the mapping is, so that we can avoid repeated get_blocks calls. + */ + if (direct || size > (1 << inode->i_blkbits)) { + xfs_off_t mapping_size; + + mapping_size = imap.br_startoff + imap.br_blockcount - iblock; + mapping_size <<= inode->i_blkbits; + + ASSERT(mapping_size > 0); + if (mapping_size > size) + mapping_size = size; + if (mapping_size > LONG_MAX) + mapping_size = LONG_MAX; + + bh_result->b_size = mapping_size; + } + + return 0; + +out_unlock: + xfs_iunlock(ip, lockmode); + return -error; +} + +int +xfs_get_blocks( + struct inode *inode, + sector_t iblock, + struct buffer_head *bh_result, + int create) +{ + return __xfs_get_blocks(inode, iblock, bh_result, create, 0); +} + +STATIC int +xfs_get_blocks_direct( + struct inode *inode, + sector_t iblock, + struct buffer_head *bh_result, + int create) +{ + return __xfs_get_blocks(inode, iblock, bh_result, create, 1); +} + +/* + * Complete a direct I/O write request. + * + * If the private argument is non-NULL __xfs_get_blocks signals us that we + * need to issue a transaction to convert the range from unwritten to written + * extents. In case this is regular synchronous I/O we just call xfs_end_io + * to do this and we are done. But in case this was a successful AIO + * request this handler is called from interrupt context, from which we + * can't start transactions. In that case offload the I/O completion to + * the workqueues we also use for buffered I/O completion. + */ +STATIC void +xfs_end_io_direct_write( + struct kiocb *iocb, + loff_t offset, + ssize_t size, + void *private, + int ret, + bool is_async) +{ + struct xfs_ioend *ioend = iocb->private; + + /* + * blockdev_direct_IO can return an error even after the I/O + * completion handler was called. Thus we need to protect + * against double-freeing. + */ + iocb->private = NULL; + + ioend->io_offset = offset; + ioend->io_size = size; + if (private && size > 0) + ioend->io_type = IO_UNWRITTEN; + + if (is_async) { + /* + * If we are converting an unwritten extent we need to delay + * the AIO completion until after the unwrittent extent + * conversion has completed, otherwise do it ASAP. + */ + if (ioend->io_type == IO_UNWRITTEN) { + ioend->io_iocb = iocb; + ioend->io_result = ret; + } else { + aio_complete(iocb, ret, 0); + } + xfs_finish_ioend(ioend); + } else { + xfs_finish_ioend_sync(ioend); + } + + /* XXX: probably should move into the real I/O completion handler */ + inode_dio_done(ioend->io_inode); +} + +STATIC ssize_t +xfs_vm_direct_IO( + int rw, + struct kiocb *iocb, + const struct iovec *iov, + loff_t offset, + unsigned long nr_segs) +{ + struct inode *inode = iocb->ki_filp->f_mapping->host; + struct block_device *bdev = xfs_find_bdev_for_inode(inode); + ssize_t ret; + + if (rw & WRITE) { + iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); + + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, + offset, nr_segs, + xfs_get_blocks_direct, + xfs_end_io_direct_write, NULL, 0); + if (ret != -EIOCBQUEUED && iocb->private) + xfs_destroy_ioend(iocb->private); + } else { + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, + offset, nr_segs, + xfs_get_blocks_direct, + NULL, NULL, 0); + } + + return ret; +} + +STATIC void +xfs_vm_write_failed( + struct address_space *mapping, + loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + /* + * punch out the delalloc blocks we have already allocated. We + * don't call xfs_setattr() to do this as we may be in the + * middle of a multi-iovec write and so the vfs inode->i_size + * will not match the xfs ip->i_size and so it will zero too + * much. Hence we jus truncate the page cache to zero what is + * necessary and punch the delalloc blocks directly. + */ + struct xfs_inode *ip = XFS_I(inode); + xfs_fileoff_t start_fsb; + xfs_fileoff_t end_fsb; + int error; + + truncate_pagecache(inode, to, inode->i_size); + + /* + * Check if there are any blocks that are outside of i_size + * that need to be trimmed back. + */ + start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; + end_fsb = XFS_B_TO_FSB(ip->i_mount, to); + if (end_fsb <= start_fsb) + return; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_bmap_punch_delalloc_range(ip, start_fsb, + end_fsb - start_fsb); + if (error) { + /* something screwed, just bail */ + if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_alert(ip->i_mount, + "xfs_vm_write_failed: unable to clean up ino %lld", + ip->i_ino); + } + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } +} + +STATIC int +xfs_vm_write_begin( + struct file *file, + struct address_space *mapping, + loff_t pos, + unsigned len, + unsigned flags, + struct page **pagep, + void **fsdata) +{ + int ret; + + ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, + pagep, xfs_get_blocks); + if (unlikely(ret)) + xfs_vm_write_failed(mapping, pos + len); + return ret; +} + +STATIC int +xfs_vm_write_end( + struct file *file, + struct address_space *mapping, + loff_t pos, + unsigned len, + unsigned copied, + struct page *page, + void *fsdata) +{ + int ret; + + ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); + if (unlikely(ret < len)) + xfs_vm_write_failed(mapping, pos + len); + return ret; +} + +STATIC sector_t +xfs_vm_bmap( + struct address_space *mapping, + sector_t block) +{ + struct inode *inode = (struct inode *)mapping->host; + struct xfs_inode *ip = XFS_I(inode); + + trace_xfs_vm_bmap(XFS_I(inode)); + xfs_ilock(ip, XFS_IOLOCK_SHARED); + xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return generic_block_bmap(mapping, block, xfs_get_blocks); +} + +STATIC int +xfs_vm_readpage( + struct file *unused, + struct page *page) +{ + return mpage_readpage(page, xfs_get_blocks); +} + +STATIC int +xfs_vm_readpages( + struct file *unused, + struct address_space *mapping, + struct list_head *pages, + unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); +} + +const struct address_space_operations xfs_address_space_operations = { + .readpage = xfs_vm_readpage, + .readpages = xfs_vm_readpages, + .writepage = xfs_vm_writepage, + .writepages = xfs_vm_writepages, + .releasepage = xfs_vm_releasepage, + .invalidatepage = xfs_vm_invalidatepage, + .write_begin = xfs_vm_write_begin, + .write_end = xfs_vm_write_end, + .bmap = xfs_vm_bmap, + .direct_IO = xfs_vm_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, +}; diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h new file mode 100644 index 0000000..71f721e --- /dev/null +++ b/fs/xfs/xfs_aops.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2005-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_AOPS_H__ +#define __XFS_AOPS_H__ + +extern struct workqueue_struct *xfsdatad_workqueue; +extern struct workqueue_struct *xfsconvertd_workqueue; +extern mempool_t *xfs_ioend_pool; + +/* + * Types of I/O for bmap clustering and I/O completion tracking. + */ +enum { + IO_DIRECT = 0, /* special case for direct I/O ioends */ + IO_DELALLOC, /* mapping covers delalloc region */ + IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ + IO_OVERWRITE, /* mapping covers already allocated extent */ +}; + +#define XFS_IO_TYPES \ + { 0, "" }, \ + { IO_DELALLOC, "delalloc" }, \ + { IO_UNWRITTEN, "unwritten" }, \ + { IO_OVERWRITE, "overwrite" } + +/* + * xfs_ioend struct manages large extent writes for XFS. + * It can manage several multi-page bio's at once. + */ +typedef struct xfs_ioend { + struct xfs_ioend *io_list; /* next ioend in chain */ + unsigned int io_type; /* delalloc / unwritten */ + int io_error; /* I/O error code */ + atomic_t io_remaining; /* hold count */ + struct inode *io_inode; /* file being written to */ + struct buffer_head *io_buffer_head;/* buffer linked list head */ + struct buffer_head *io_buffer_tail;/* buffer linked list tail */ + size_t io_size; /* size of the extent */ + xfs_off_t io_offset; /* offset in the file */ + struct work_struct io_work; /* xfsdatad work queue */ + struct kiocb *io_iocb; + int io_result; +} xfs_ioend_t; + +extern const struct address_space_operations xfs_address_space_operations; +extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); + +extern void xfs_ioend_init(void); +extern void xfs_ioend_wait(struct xfs_inode *); + +extern void xfs_count_page_state(struct page *, int *, int *); + +#endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c new file mode 100644 index 0000000..c57836d --- /dev/null +++ b/fs/xfs/xfs_buf.c @@ -0,0 +1,1876 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xfs_sb.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_trace.h" + +static kmem_zone_t *xfs_buf_zone; +STATIC int xfsbufd(void *); +STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); + +static struct workqueue_struct *xfslogd_workqueue; +struct workqueue_struct *xfsdatad_workqueue; +struct workqueue_struct *xfsconvertd_workqueue; + +#ifdef XFS_BUF_LOCK_TRACKING +# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) +# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) +# define XB_GET_OWNER(bp) ((bp)->b_last_holder) +#else +# define XB_SET_OWNER(bp) do { } while (0) +# define XB_CLEAR_OWNER(bp) do { } while (0) +# define XB_GET_OWNER(bp) do { } while (0) +#endif + +#define xb_to_gfp(flags) \ + ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \ + ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) + +#define xb_to_km(flags) \ + (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) + +#define xfs_buf_allocate(flags) \ + kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)) +#define xfs_buf_deallocate(bp) \ + kmem_zone_free(xfs_buf_zone, (bp)); + +static inline int +xfs_buf_is_vmapped( + struct xfs_buf *bp) +{ + /* + * Return true if the buffer is vmapped. + * + * The XBF_MAPPED flag is set if the buffer should be mapped, but the + * code is clever enough to know it doesn't have to map a single page, + * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1. + */ + return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1; +} + +static inline int +xfs_buf_vmap_len( + struct xfs_buf *bp) +{ + return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; +} + +/* + * xfs_buf_lru_add - add a buffer to the LRU. + * + * The LRU takes a new reference to the buffer so that it will only be freed + * once the shrinker takes the buffer off the LRU. + */ +STATIC void +xfs_buf_lru_add( + struct xfs_buf *bp) +{ + struct xfs_buftarg *btp = bp->b_target; + + spin_lock(&btp->bt_lru_lock); + if (list_empty(&bp->b_lru)) { + atomic_inc(&bp->b_hold); + list_add_tail(&bp->b_lru, &btp->bt_lru); + btp->bt_lru_nr++; + } + spin_unlock(&btp->bt_lru_lock); +} + +/* + * xfs_buf_lru_del - remove a buffer from the LRU + * + * The unlocked check is safe here because it only occurs when there are not + * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there + * to optimise the shrinker removing the buffer from the LRU and calling + * xfs_buf_free(). i.e. it removes an unnecessary round trip on the + * bt_lru_lock. + */ +STATIC void +xfs_buf_lru_del( + struct xfs_buf *bp) +{ + struct xfs_buftarg *btp = bp->b_target; + + if (list_empty(&bp->b_lru)) + return; + + spin_lock(&btp->bt_lru_lock); + if (!list_empty(&bp->b_lru)) { + list_del_init(&bp->b_lru); + btp->bt_lru_nr--; + } + spin_unlock(&btp->bt_lru_lock); +} + +/* + * When we mark a buffer stale, we remove the buffer from the LRU and clear the + * b_lru_ref count so that the buffer is freed immediately when the buffer + * reference count falls to zero. If the buffer is already on the LRU, we need + * to remove the reference that LRU holds on the buffer. + * + * This prevents build-up of stale buffers on the LRU. + */ +void +xfs_buf_stale( + struct xfs_buf *bp) +{ + bp->b_flags |= XBF_STALE; + atomic_set(&(bp)->b_lru_ref, 0); + if (!list_empty(&bp->b_lru)) { + struct xfs_buftarg *btp = bp->b_target; + + spin_lock(&btp->bt_lru_lock); + if (!list_empty(&bp->b_lru)) { + list_del_init(&bp->b_lru); + btp->bt_lru_nr--; + atomic_dec(&bp->b_hold); + } + spin_unlock(&btp->bt_lru_lock); + } + ASSERT(atomic_read(&bp->b_hold) >= 1); +} + +STATIC void +_xfs_buf_initialize( + xfs_buf_t *bp, + xfs_buftarg_t *target, + xfs_off_t range_base, + size_t range_length, + xfs_buf_flags_t flags) +{ + /* + * We don't want certain flags to appear in b_flags. + */ + flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD); + + memset(bp, 0, sizeof(xfs_buf_t)); + atomic_set(&bp->b_hold, 1); + atomic_set(&bp->b_lru_ref, 1); + init_completion(&bp->b_iowait); + INIT_LIST_HEAD(&bp->b_lru); + INIT_LIST_HEAD(&bp->b_list); + RB_CLEAR_NODE(&bp->b_rbnode); + sema_init(&bp->b_sema, 0); /* held, no waiters */ + XB_SET_OWNER(bp); + bp->b_target = target; + bp->b_file_offset = range_base; + /* + * Set buffer_length and count_desired to the same value initially. + * I/O routines should use count_desired, which will be the same in + * most cases but may be reset (e.g. XFS recovery). + */ + bp->b_buffer_length = bp->b_count_desired = range_length; + bp->b_flags = flags; + bp->b_bn = XFS_BUF_DADDR_NULL; + atomic_set(&bp->b_pin_count, 0); + init_waitqueue_head(&bp->b_waiters); + + XFS_STATS_INC(xb_create); + + trace_xfs_buf_init(bp, _RET_IP_); +} + +/* + * Allocate a page array capable of holding a specified number + * of pages, and point the page buf at it. + */ +STATIC int +_xfs_buf_get_pages( + xfs_buf_t *bp, + int page_count, + xfs_buf_flags_t flags) +{ + /* Make sure that we have a page list */ + if (bp->b_pages == NULL) { + bp->b_offset = xfs_buf_poff(bp->b_file_offset); + bp->b_page_count = page_count; + if (page_count <= XB_PAGES) { + bp->b_pages = bp->b_page_array; + } else { + bp->b_pages = kmem_alloc(sizeof(struct page *) * + page_count, xb_to_km(flags)); + if (bp->b_pages == NULL) + return -ENOMEM; + } + memset(bp->b_pages, 0, sizeof(struct page *) * page_count); + } + return 0; +} + +/* + * Frees b_pages if it was allocated. + */ +STATIC void +_xfs_buf_free_pages( + xfs_buf_t *bp) +{ + if (bp->b_pages != bp->b_page_array) { + kmem_free(bp->b_pages); + bp->b_pages = NULL; + } +} + +/* + * Releases the specified buffer. + * + * The modification state of any associated pages is left unchanged. + * The buffer most not be on any hash - use xfs_buf_rele instead for + * hashed and refcounted buffers + */ +void +xfs_buf_free( + xfs_buf_t *bp) +{ + trace_xfs_buf_free(bp, _RET_IP_); + + ASSERT(list_empty(&bp->b_lru)); + + if (bp->b_flags & _XBF_PAGES) { + uint i; + + if (xfs_buf_is_vmapped(bp)) + vm_unmap_ram(bp->b_addr - bp->b_offset, + bp->b_page_count); + + for (i = 0; i < bp->b_page_count; i++) { + struct page *page = bp->b_pages[i]; + + __free_page(page); + } + } else if (bp->b_flags & _XBF_KMEM) + kmem_free(bp->b_addr); + _xfs_buf_free_pages(bp); + xfs_buf_deallocate(bp); +} + +/* + * Allocates all the pages for buffer in question and builds it's page list. + */ +STATIC int +xfs_buf_allocate_memory( + xfs_buf_t *bp, + uint flags) +{ + size_t size = bp->b_count_desired; + size_t nbytes, offset; + gfp_t gfp_mask = xb_to_gfp(flags); + unsigned short page_count, i; + xfs_off_t end; + int error; + + /* + * for buffers that are contained within a single page, just allocate + * the memory from the heap - there's no need for the complexity of + * page arrays to keep allocation down to order 0. + */ + if (bp->b_buffer_length < PAGE_SIZE) { + bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); + if (!bp->b_addr) { + /* low memory - use alloc_page loop instead */ + goto use_alloc_page; + } + + if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & + PAGE_MASK) != + ((unsigned long)bp->b_addr & PAGE_MASK)) { + /* b_addr spans two pages - use alloc_page instead */ + kmem_free(bp->b_addr); + bp->b_addr = NULL; + goto use_alloc_page; + } + bp->b_offset = offset_in_page(bp->b_addr); + bp->b_pages = bp->b_page_array; + bp->b_pages[0] = virt_to_page(bp->b_addr); + bp->b_page_count = 1; + bp->b_flags |= XBF_MAPPED | _XBF_KMEM; + return 0; + } + +use_alloc_page: + end = bp->b_file_offset + bp->b_buffer_length; + page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); + error = _xfs_buf_get_pages(bp, page_count, flags); + if (unlikely(error)) + return error; + + offset = bp->b_offset; + bp->b_flags |= _XBF_PAGES; + + for (i = 0; i < bp->b_page_count; i++) { + struct page *page; + uint retries = 0; +retry: + page = alloc_page(gfp_mask); + if (unlikely(page == NULL)) { + if (flags & XBF_READ_AHEAD) { + bp->b_page_count = i; + error = ENOMEM; + goto out_free_pages; + } + + /* + * This could deadlock. + * + * But until all the XFS lowlevel code is revamped to + * handle buffer allocation failures we can't do much. + */ + if (!(++retries % 100)) + xfs_err(NULL, + "possible memory allocation deadlock in %s (mode:0x%x)", + __func__, gfp_mask); + + XFS_STATS_INC(xb_page_retries); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + + XFS_STATS_INC(xb_page_found); + + nbytes = min_t(size_t, size, PAGE_SIZE - offset); + size -= nbytes; + bp->b_pages[i] = page; + offset = 0; + } + return 0; + +out_free_pages: + for (i = 0; i < bp->b_page_count; i++) + __free_page(bp->b_pages[i]); + return error; +} + +/* + * Map buffer into kernel address-space if necessary. + */ +STATIC int +_xfs_buf_map_pages( + xfs_buf_t *bp, + uint flags) +{ + ASSERT(bp->b_flags & _XBF_PAGES); + if (bp->b_page_count == 1) { + /* A single page buffer is always mappable */ + bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; + bp->b_flags |= XBF_MAPPED; + } else if (flags & XBF_MAPPED) { + int retried = 0; + + do { + bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, + -1, PAGE_KERNEL); + if (bp->b_addr) + break; + vm_unmap_aliases(); + } while (retried++ <= 1); + + if (!bp->b_addr) + return -ENOMEM; + bp->b_addr += bp->b_offset; + bp->b_flags |= XBF_MAPPED; + } + + return 0; +} + +/* + * Finding and Reading Buffers + */ + +/* + * Look up, and creates if absent, a lockable buffer for + * a given range of an inode. The buffer is returned + * locked. If other overlapping buffers exist, they are + * released before the new buffer is created and locked, + * which may imply that this call will block until those buffers + * are unlocked. No I/O is implied by this call. + */ +xfs_buf_t * +_xfs_buf_find( + xfs_buftarg_t *btp, /* block device target */ + xfs_off_t ioff, /* starting offset of range */ + size_t isize, /* length of range */ + xfs_buf_flags_t flags, + xfs_buf_t *new_bp) +{ + xfs_off_t range_base; + size_t range_length; + struct xfs_perag *pag; + struct rb_node **rbp; + struct rb_node *parent; + xfs_buf_t *bp; + + range_base = (ioff << BBSHIFT); + range_length = (isize << BBSHIFT); + + /* Check for IOs smaller than the sector size / not sector aligned */ + ASSERT(!(range_length < (1 << btp->bt_sshift))); + ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); + + /* get tree root */ + pag = xfs_perag_get(btp->bt_mount, + xfs_daddr_to_agno(btp->bt_mount, ioff)); + + /* walk tree */ + spin_lock(&pag->pag_buf_lock); + rbp = &pag->pag_buf_tree.rb_node; + parent = NULL; + bp = NULL; + while (*rbp) { + parent = *rbp; + bp = rb_entry(parent, struct xfs_buf, b_rbnode); + + if (range_base < bp->b_file_offset) + rbp = &(*rbp)->rb_left; + else if (range_base > bp->b_file_offset) + rbp = &(*rbp)->rb_right; + else { + /* + * found a block offset match. If the range doesn't + * match, the only way this is allowed is if the buffer + * in the cache is stale and the transaction that made + * it stale has not yet committed. i.e. we are + * reallocating a busy extent. Skip this buffer and + * continue searching to the right for an exact match. + */ + if (bp->b_buffer_length != range_length) { + ASSERT(bp->b_flags & XBF_STALE); + rbp = &(*rbp)->rb_right; + continue; + } + atomic_inc(&bp->b_hold); + goto found; + } + } + + /* No match found */ + if (new_bp) { + _xfs_buf_initialize(new_bp, btp, range_base, + range_length, flags); + rb_link_node(&new_bp->b_rbnode, parent, rbp); + rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); + /* the buffer keeps the perag reference until it is freed */ + new_bp->b_pag = pag; + spin_unlock(&pag->pag_buf_lock); + } else { + XFS_STATS_INC(xb_miss_locked); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); + } + return new_bp; + +found: + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); + + if (!xfs_buf_trylock(bp)) { + if (flags & XBF_TRYLOCK) { + xfs_buf_rele(bp); + XFS_STATS_INC(xb_busy_locked); + return NULL; + } + xfs_buf_lock(bp); + XFS_STATS_INC(xb_get_locked_waited); + } + + /* + * if the buffer is stale, clear all the external state associated with + * it. We need to keep flags such as how we allocated the buffer memory + * intact here. + */ + if (bp->b_flags & XBF_STALE) { + ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); + bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES; + } + + trace_xfs_buf_find(bp, flags, _RET_IP_); + XFS_STATS_INC(xb_get_locked); + return bp; +} + +/* + * Assembles a buffer covering the specified range. + * Storage in memory for all portions of the buffer will be allocated, + * although backing storage may not be. + */ +xfs_buf_t * +xfs_buf_get( + xfs_buftarg_t *target,/* target for buffer */ + xfs_off_t ioff, /* starting offset of range */ + size_t isize, /* length of range */ + xfs_buf_flags_t flags) +{ + xfs_buf_t *bp, *new_bp; + int error = 0; + + new_bp = xfs_buf_allocate(flags); + if (unlikely(!new_bp)) + return NULL; + + bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); + if (bp == new_bp) { + error = xfs_buf_allocate_memory(bp, flags); + if (error) + goto no_buffer; + } else { + xfs_buf_deallocate(new_bp); + if (unlikely(bp == NULL)) + return NULL; + } + + if (!(bp->b_flags & XBF_MAPPED)) { + error = _xfs_buf_map_pages(bp, flags); + if (unlikely(error)) { + xfs_warn(target->bt_mount, + "%s: failed to map pages\n", __func__); + goto no_buffer; + } + } + + XFS_STATS_INC(xb_get); + + /* + * Always fill in the block number now, the mapped cases can do + * their own overlay of this later. + */ + bp->b_bn = ioff; + bp->b_count_desired = bp->b_buffer_length; + + trace_xfs_buf_get(bp, flags, _RET_IP_); + return bp; + + no_buffer: + if (flags & (XBF_LOCK | XBF_TRYLOCK)) + xfs_buf_unlock(bp); + xfs_buf_rele(bp); + return NULL; +} + +STATIC int +_xfs_buf_read( + xfs_buf_t *bp, + xfs_buf_flags_t flags) +{ + int status; + + ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); + ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); + + bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD); + bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); + + status = xfs_buf_iorequest(bp); + if (status || bp->b_error || (flags & XBF_ASYNC)) + return status; + return xfs_buf_iowait(bp); +} + +xfs_buf_t * +xfs_buf_read( + xfs_buftarg_t *target, + xfs_off_t ioff, + size_t isize, + xfs_buf_flags_t flags) +{ + xfs_buf_t *bp; + + flags |= XBF_READ; + + bp = xfs_buf_get(target, ioff, isize, flags); + if (bp) { + trace_xfs_buf_read(bp, flags, _RET_IP_); + + if (!XFS_BUF_ISDONE(bp)) { + XFS_STATS_INC(xb_get_read); + _xfs_buf_read(bp, flags); + } else if (flags & XBF_ASYNC) { + /* + * Read ahead call which is already satisfied, + * drop the buffer + */ + goto no_buffer; + } else { + /* We do not want read in the flags */ + bp->b_flags &= ~XBF_READ; + } + } + + return bp; + + no_buffer: + if (flags & (XBF_LOCK | XBF_TRYLOCK)) + xfs_buf_unlock(bp); + xfs_buf_rele(bp); + return NULL; +} + +/* + * If we are not low on memory then do the readahead in a deadlock + * safe manner. + */ +void +xfs_buf_readahead( + xfs_buftarg_t *target, + xfs_off_t ioff, + size_t isize) +{ + if (bdi_read_congested(target->bt_bdi)) + return; + + xfs_buf_read(target, ioff, isize, + XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK); +} + +/* + * Read an uncached buffer from disk. Allocates and returns a locked + * buffer containing the disk contents or nothing. + */ +struct xfs_buf * +xfs_buf_read_uncached( + struct xfs_mount *mp, + struct xfs_buftarg *target, + xfs_daddr_t daddr, + size_t length, + int flags) +{ + xfs_buf_t *bp; + int error; + + bp = xfs_buf_get_uncached(target, length, flags); + if (!bp) + return NULL; + + /* set up the buffer for a read IO */ + XFS_BUF_SET_ADDR(bp, daddr); + XFS_BUF_READ(bp); + + xfsbdstrat(mp, bp); + error = xfs_buf_iowait(bp); + if (error || bp->b_error) { + xfs_buf_relse(bp); + return NULL; + } + return bp; +} + +xfs_buf_t * +xfs_buf_get_empty( + size_t len, + xfs_buftarg_t *target) +{ + xfs_buf_t *bp; + + bp = xfs_buf_allocate(0); + if (bp) + _xfs_buf_initialize(bp, target, 0, len, 0); + return bp; +} + +/* + * Return a buffer allocated as an empty buffer and associated to external + * memory via xfs_buf_associate_memory() back to it's empty state. + */ +void +xfs_buf_set_empty( + struct xfs_buf *bp, + size_t len) +{ + if (bp->b_pages) + _xfs_buf_free_pages(bp); + + bp->b_pages = NULL; + bp->b_page_count = 0; + bp->b_addr = NULL; + bp->b_file_offset = 0; + bp->b_buffer_length = bp->b_count_desired = len; + bp->b_bn = XFS_BUF_DADDR_NULL; + bp->b_flags &= ~XBF_MAPPED; +} + +static inline struct page * +mem_to_page( + void *addr) +{ + if ((!is_vmalloc_addr(addr))) { + return virt_to_page(addr); + } else { + return vmalloc_to_page(addr); + } +} + +int +xfs_buf_associate_memory( + xfs_buf_t *bp, + void *mem, + size_t len) +{ + int rval; + int i = 0; + unsigned long pageaddr; + unsigned long offset; + size_t buflen; + int page_count; + + pageaddr = (unsigned long)mem & PAGE_MASK; + offset = (unsigned long)mem - pageaddr; + buflen = PAGE_ALIGN(len + offset); + page_count = buflen >> PAGE_SHIFT; + + /* Free any previous set of page pointers */ + if (bp->b_pages) + _xfs_buf_free_pages(bp); + + bp->b_pages = NULL; + bp->b_addr = mem; + + rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK); + if (rval) + return rval; + + bp->b_offset = offset; + + for (i = 0; i < bp->b_page_count; i++) { + bp->b_pages[i] = mem_to_page((void *)pageaddr); + pageaddr += PAGE_SIZE; + } + + bp->b_count_desired = len; + bp->b_buffer_length = buflen; + bp->b_flags |= XBF_MAPPED; + + return 0; +} + +xfs_buf_t * +xfs_buf_get_uncached( + struct xfs_buftarg *target, + size_t len, + int flags) +{ + unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; + int error, i; + xfs_buf_t *bp; + + bp = xfs_buf_allocate(0); + if (unlikely(bp == NULL)) + goto fail; + _xfs_buf_initialize(bp, target, 0, len, 0); + + error = _xfs_buf_get_pages(bp, page_count, 0); + if (error) + goto fail_free_buf; + + for (i = 0; i < page_count; i++) { + bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); + if (!bp->b_pages[i]) + goto fail_free_mem; + } + bp->b_flags |= _XBF_PAGES; + + error = _xfs_buf_map_pages(bp, XBF_MAPPED); + if (unlikely(error)) { + xfs_warn(target->bt_mount, + "%s: failed to map pages\n", __func__); + goto fail_free_mem; + } + + trace_xfs_buf_get_uncached(bp, _RET_IP_); + return bp; + + fail_free_mem: + while (--i >= 0) + __free_page(bp->b_pages[i]); + _xfs_buf_free_pages(bp); + fail_free_buf: + xfs_buf_deallocate(bp); + fail: + return NULL; +} + +/* + * Increment reference count on buffer, to hold the buffer concurrently + * with another thread which may release (free) the buffer asynchronously. + * Must hold the buffer already to call this function. + */ +void +xfs_buf_hold( + xfs_buf_t *bp) +{ + trace_xfs_buf_hold(bp, _RET_IP_); + atomic_inc(&bp->b_hold); +} + +/* + * Releases a hold on the specified buffer. If the + * the hold count is 1, calls xfs_buf_free. + */ +void +xfs_buf_rele( + xfs_buf_t *bp) +{ + struct xfs_perag *pag = bp->b_pag; + + trace_xfs_buf_rele(bp, _RET_IP_); + + if (!pag) { + ASSERT(list_empty(&bp->b_lru)); + ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); + if (atomic_dec_and_test(&bp->b_hold)) + xfs_buf_free(bp); + return; + } + + ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); + + ASSERT(atomic_read(&bp->b_hold) > 0); + if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { + if (!(bp->b_flags & XBF_STALE) && + atomic_read(&bp->b_lru_ref)) { + xfs_buf_lru_add(bp); + spin_unlock(&pag->pag_buf_lock); + } else { + xfs_buf_lru_del(bp); + ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); + rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); + xfs_buf_free(bp); + } + } +} + + +/* + * Lock a buffer object, if it is not already locked. + * + * If we come across a stale, pinned, locked buffer, we know that we are + * being asked to lock a buffer that has been reallocated. Because it is + * pinned, we know that the log has not been pushed to disk and hence it + * will still be locked. Rather than continuing to have trylock attempts + * fail until someone else pushes the log, push it ourselves before + * returning. This means that the xfsaild will not get stuck trying + * to push on stale inode buffers. + */ +int +xfs_buf_trylock( + struct xfs_buf *bp) +{ + int locked; + + locked = down_trylock(&bp->b_sema) == 0; + if (locked) + XB_SET_OWNER(bp); + else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) + xfs_log_force(bp->b_target->bt_mount, 0); + + trace_xfs_buf_trylock(bp, _RET_IP_); + return locked; +} + +/* + * Lock a buffer object. + * + * If we come across a stale, pinned, locked buffer, we know that we + * are being asked to lock a buffer that has been reallocated. Because + * it is pinned, we know that the log has not been pushed to disk and + * hence it will still be locked. Rather than sleeping until someone + * else pushes the log, push it ourselves before trying to get the lock. + */ +void +xfs_buf_lock( + struct xfs_buf *bp) +{ + trace_xfs_buf_lock(bp, _RET_IP_); + + if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) + xfs_log_force(bp->b_target->bt_mount, 0); + down(&bp->b_sema); + XB_SET_OWNER(bp); + + trace_xfs_buf_lock_done(bp, _RET_IP_); +} + +/* + * Releases the lock on the buffer object. + * If the buffer is marked delwri but is not queued, do so before we + * unlock the buffer as we need to set flags correctly. We also need to + * take a reference for the delwri queue because the unlocker is going to + * drop their's and they don't know we just queued it. + */ +void +xfs_buf_unlock( + struct xfs_buf *bp) +{ + if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { + atomic_inc(&bp->b_hold); + bp->b_flags |= XBF_ASYNC; + xfs_buf_delwri_queue(bp, 0); + } + + XB_CLEAR_OWNER(bp); + up(&bp->b_sema); + + trace_xfs_buf_unlock(bp, _RET_IP_); +} + +STATIC void +xfs_buf_wait_unpin( + xfs_buf_t *bp) +{ + DECLARE_WAITQUEUE (wait, current); + + if (atomic_read(&bp->b_pin_count) == 0) + return; + + add_wait_queue(&bp->b_waiters, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (atomic_read(&bp->b_pin_count) == 0) + break; + io_schedule(); + } + remove_wait_queue(&bp->b_waiters, &wait); + set_current_state(TASK_RUNNING); +} + +/* + * Buffer Utility Routines + */ + +STATIC void +xfs_buf_iodone_work( + struct work_struct *work) +{ + xfs_buf_t *bp = + container_of(work, xfs_buf_t, b_iodone_work); + + if (bp->b_iodone) + (*(bp->b_iodone))(bp); + else if (bp->b_flags & XBF_ASYNC) + xfs_buf_relse(bp); +} + +void +xfs_buf_ioend( + xfs_buf_t *bp, + int schedule) +{ + trace_xfs_buf_iodone(bp, _RET_IP_); + + bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); + if (bp->b_error == 0) + bp->b_flags |= XBF_DONE; + + if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { + if (schedule) { + INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); + queue_work(xfslogd_workqueue, &bp->b_iodone_work); + } else { + xfs_buf_iodone_work(&bp->b_iodone_work); + } + } else { + complete(&bp->b_iowait); + } +} + +void +xfs_buf_ioerror( + xfs_buf_t *bp, + int error) +{ + ASSERT(error >= 0 && error <= 0xffff); + bp->b_error = (unsigned short)error; + trace_xfs_buf_ioerror(bp, error, _RET_IP_); +} + +int +xfs_bwrite( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + int error; + + bp->b_flags |= XBF_WRITE; + bp->b_flags &= ~(XBF_ASYNC | XBF_READ); + + xfs_buf_delwri_dequeue(bp); + xfs_bdstrat_cb(bp); + + error = xfs_buf_iowait(bp); + if (error) + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + xfs_buf_relse(bp); + return error; +} + +void +xfs_bdwrite( + void *mp, + struct xfs_buf *bp) +{ + trace_xfs_buf_bdwrite(bp, _RET_IP_); + + bp->b_flags &= ~XBF_READ; + bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); + + xfs_buf_delwri_queue(bp, 1); +} + +/* + * Called when we want to stop a buffer from getting written or read. + * We attach the EIO error, muck with its flags, and call xfs_buf_ioend + * so that the proper iodone callbacks get called. + */ +STATIC int +xfs_bioerror( + xfs_buf_t *bp) +{ +#ifdef XFSERRORDEBUG + ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); +#endif + + /* + * No need to wait until the buffer is unpinned, we aren't flushing it. + */ + xfs_buf_ioerror(bp, EIO); + + /* + * We're calling xfs_buf_ioend, so delete XBF_DONE flag. + */ + XFS_BUF_UNREAD(bp); + XFS_BUF_UNDELAYWRITE(bp); + XFS_BUF_UNDONE(bp); + XFS_BUF_STALE(bp); + + xfs_buf_ioend(bp, 0); + + return EIO; +} + +/* + * Same as xfs_bioerror, except that we are releasing the buffer + * here ourselves, and avoiding the xfs_buf_ioend call. + * This is meant for userdata errors; metadata bufs come with + * iodone functions attached, so that we can track down errors. + */ +STATIC int +xfs_bioerror_relse( + struct xfs_buf *bp) +{ + int64_t fl = bp->b_flags; + /* + * No need to wait until the buffer is unpinned. + * We aren't flushing it. + * + * chunkhold expects B_DONE to be set, whether + * we actually finish the I/O or not. We don't want to + * change that interface. + */ + XFS_BUF_UNREAD(bp); + XFS_BUF_UNDELAYWRITE(bp); + XFS_BUF_DONE(bp); + XFS_BUF_STALE(bp); + bp->b_iodone = NULL; + if (!(fl & XBF_ASYNC)) { + /* + * Mark b_error and B_ERROR _both_. + * Lot's of chunkcache code assumes that. + * There's no reason to mark error for + * ASYNC buffers. + */ + xfs_buf_ioerror(bp, EIO); + XFS_BUF_FINISH_IOWAIT(bp); + } else { + xfs_buf_relse(bp); + } + + return EIO; +} + + +/* + * All xfs metadata buffers except log state machine buffers + * get this attached as their b_bdstrat callback function. + * This is so that we can catch a buffer + * after prematurely unpinning it to forcibly shutdown the filesystem. + */ +int +xfs_bdstrat_cb( + struct xfs_buf *bp) +{ + if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + /* + * Metadata write that didn't get logged but + * written delayed anyway. These aren't associated + * with a transaction, and can be ignored. + */ + if (!bp->b_iodone && !XFS_BUF_ISREAD(bp)) + return xfs_bioerror_relse(bp); + else + return xfs_bioerror(bp); + } + + xfs_buf_iorequest(bp); + return 0; +} + +/* + * Wrapper around bdstrat so that we can stop data from going to disk in case + * we are shutting down the filesystem. Typically user data goes thru this + * path; one of the exceptions is the superblock. + */ +void +xfsbdstrat( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + if (XFS_FORCED_SHUTDOWN(mp)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + xfs_bioerror_relse(bp); + return; + } + + xfs_buf_iorequest(bp); +} + +STATIC void +_xfs_buf_ioend( + xfs_buf_t *bp, + int schedule) +{ + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) + xfs_buf_ioend(bp, schedule); +} + +STATIC void +xfs_buf_bio_end_io( + struct bio *bio, + int error) +{ + xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; + + xfs_buf_ioerror(bp, -error); + + if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) + invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); + + _xfs_buf_ioend(bp, 1); + bio_put(bio); +} + +STATIC void +_xfs_buf_ioapply( + xfs_buf_t *bp) +{ + int rw, map_i, total_nr_pages, nr_pages; + struct bio *bio; + int offset = bp->b_offset; + int size = bp->b_count_desired; + sector_t sector = bp->b_bn; + + total_nr_pages = bp->b_page_count; + map_i = 0; + + if (bp->b_flags & XBF_WRITE) { + if (bp->b_flags & XBF_SYNCIO) + rw = WRITE_SYNC; + else + rw = WRITE; + if (bp->b_flags & XBF_FUA) + rw |= REQ_FUA; + if (bp->b_flags & XBF_FLUSH) + rw |= REQ_FLUSH; + } else if (bp->b_flags & XBF_READ_AHEAD) { + rw = READA; + } else { + rw = READ; + } + + /* we only use the buffer cache for meta-data */ + rw |= REQ_META; + +next_chunk: + atomic_inc(&bp->b_io_remaining); + nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); + if (nr_pages > total_nr_pages) + nr_pages = total_nr_pages; + + bio = bio_alloc(GFP_NOIO, nr_pages); + bio->bi_bdev = bp->b_target->bt_bdev; + bio->bi_sector = sector; + bio->bi_end_io = xfs_buf_bio_end_io; + bio->bi_private = bp; + + + for (; size && nr_pages; nr_pages--, map_i++) { + int rbytes, nbytes = PAGE_SIZE - offset; + + if (nbytes > size) + nbytes = size; + + rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset); + if (rbytes < nbytes) + break; + + offset = 0; + sector += nbytes >> BBSHIFT; + size -= nbytes; + total_nr_pages--; + } + + if (likely(bio->bi_size)) { + if (xfs_buf_is_vmapped(bp)) { + flush_kernel_vmap_range(bp->b_addr, + xfs_buf_vmap_len(bp)); + } + submit_bio(rw, bio); + if (size) + goto next_chunk; + } else { + xfs_buf_ioerror(bp, EIO); + bio_put(bio); + } +} + +int +xfs_buf_iorequest( + xfs_buf_t *bp) +{ + trace_xfs_buf_iorequest(bp, _RET_IP_); + + if (bp->b_flags & XBF_DELWRI) { + xfs_buf_delwri_queue(bp, 1); + return 0; + } + + if (bp->b_flags & XBF_WRITE) { + xfs_buf_wait_unpin(bp); + } + + xfs_buf_hold(bp); + + /* Set the count to 1 initially, this will stop an I/O + * completion callout which happens before we have started + * all the I/O from calling xfs_buf_ioend too early. + */ + atomic_set(&bp->b_io_remaining, 1); + _xfs_buf_ioapply(bp); + _xfs_buf_ioend(bp, 0); + + xfs_buf_rele(bp); + return 0; +} + +/* + * Waits for I/O to complete on the buffer supplied. + * It returns immediately if no I/O is pending. + * It returns the I/O error code, if any, or 0 if there was no error. + */ +int +xfs_buf_iowait( + xfs_buf_t *bp) +{ + trace_xfs_buf_iowait(bp, _RET_IP_); + + wait_for_completion(&bp->b_iowait); + + trace_xfs_buf_iowait_done(bp, _RET_IP_); + return bp->b_error; +} + +xfs_caddr_t +xfs_buf_offset( + xfs_buf_t *bp, + size_t offset) +{ + struct page *page; + + if (bp->b_flags & XBF_MAPPED) + return bp->b_addr + offset; + + offset += bp->b_offset; + page = bp->b_pages[offset >> PAGE_SHIFT]; + return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); +} + +/* + * Move data into or out of a buffer. + */ +void +xfs_buf_iomove( + xfs_buf_t *bp, /* buffer to process */ + size_t boff, /* starting buffer offset */ + size_t bsize, /* length to copy */ + void *data, /* data address */ + xfs_buf_rw_t mode) /* read/write/zero flag */ +{ + size_t bend, cpoff, csize; + struct page *page; + + bend = boff + bsize; + while (boff < bend) { + page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; + cpoff = xfs_buf_poff(boff + bp->b_offset); + csize = min_t(size_t, + PAGE_SIZE-cpoff, bp->b_count_desired-boff); + + ASSERT(((csize + cpoff) <= PAGE_SIZE)); + + switch (mode) { + case XBRW_ZERO: + memset(page_address(page) + cpoff, 0, csize); + break; + case XBRW_READ: + memcpy(data, page_address(page) + cpoff, csize); + break; + case XBRW_WRITE: + memcpy(page_address(page) + cpoff, data, csize); + } + + boff += csize; + data += csize; + } +} + +/* + * Handling of buffer targets (buftargs). + */ + +/* + * Wait for any bufs with callbacks that have been submitted but have not yet + * returned. These buffers will have an elevated hold count, so wait on those + * while freeing all the buffers only held by the LRU. + */ +void +xfs_wait_buftarg( + struct xfs_buftarg *btp) +{ + struct xfs_buf *bp; + +restart: + spin_lock(&btp->bt_lru_lock); + while (!list_empty(&btp->bt_lru)) { + bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); + if (atomic_read(&bp->b_hold) > 1) { + spin_unlock(&btp->bt_lru_lock); + delay(100); + goto restart; + } + /* + * clear the LRU reference count so the bufer doesn't get + * ignored in xfs_buf_rele(). + */ + atomic_set(&bp->b_lru_ref, 0); + spin_unlock(&btp->bt_lru_lock); + xfs_buf_rele(bp); + spin_lock(&btp->bt_lru_lock); + } + spin_unlock(&btp->bt_lru_lock); +} + +int +xfs_buftarg_shrink( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_buftarg *btp = container_of(shrink, + struct xfs_buftarg, bt_shrinker); + struct xfs_buf *bp; + int nr_to_scan = sc->nr_to_scan; + LIST_HEAD(dispose); + + if (!nr_to_scan) + return btp->bt_lru_nr; + + spin_lock(&btp->bt_lru_lock); + while (!list_empty(&btp->bt_lru)) { + if (nr_to_scan-- <= 0) + break; + + bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); + + /* + * Decrement the b_lru_ref count unless the value is already + * zero. If the value is already zero, we need to reclaim the + * buffer, otherwise it gets another trip through the LRU. + */ + if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { + list_move_tail(&bp->b_lru, &btp->bt_lru); + continue; + } + + /* + * remove the buffer from the LRU now to avoid needing another + * lock round trip inside xfs_buf_rele(). + */ + list_move(&bp->b_lru, &dispose); + btp->bt_lru_nr--; + } + spin_unlock(&btp->bt_lru_lock); + + while (!list_empty(&dispose)) { + bp = list_first_entry(&dispose, struct xfs_buf, b_lru); + list_del_init(&bp->b_lru); + xfs_buf_rele(bp); + } + + return btp->bt_lru_nr; +} + +void +xfs_free_buftarg( + struct xfs_mount *mp, + struct xfs_buftarg *btp) +{ + unregister_shrinker(&btp->bt_shrinker); + + xfs_flush_buftarg(btp, 1); + if (mp->m_flags & XFS_MOUNT_BARRIER) + xfs_blkdev_issue_flush(btp); + + kthread_stop(btp->bt_task); + kmem_free(btp); +} + +STATIC int +xfs_setsize_buftarg_flags( + xfs_buftarg_t *btp, + unsigned int blocksize, + unsigned int sectorsize, + int verbose) +{ + btp->bt_bsize = blocksize; + btp->bt_sshift = ffs(sectorsize) - 1; + btp->bt_smask = sectorsize - 1; + + if (set_blocksize(btp->bt_bdev, sectorsize)) { + xfs_warn(btp->bt_mount, + "Cannot set_blocksize to %u on device %s\n", + sectorsize, xfs_buf_target_name(btp)); + return EINVAL; + } + + return 0; +} + +/* + * When allocating the initial buffer target we have not yet + * read in the superblock, so don't know what sized sectors + * are being used is at this early stage. Play safe. + */ +STATIC int +xfs_setsize_buftarg_early( + xfs_buftarg_t *btp, + struct block_device *bdev) +{ + return xfs_setsize_buftarg_flags(btp, + PAGE_SIZE, bdev_logical_block_size(bdev), 0); +} + +int +xfs_setsize_buftarg( + xfs_buftarg_t *btp, + unsigned int blocksize, + unsigned int sectorsize) +{ + return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); +} + +STATIC int +xfs_alloc_delwrite_queue( + xfs_buftarg_t *btp, + const char *fsname) +{ + INIT_LIST_HEAD(&btp->bt_delwrite_queue); + spin_lock_init(&btp->bt_delwrite_lock); + btp->bt_flags = 0; + btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); + if (IS_ERR(btp->bt_task)) + return PTR_ERR(btp->bt_task); + return 0; +} + +xfs_buftarg_t * +xfs_alloc_buftarg( + struct xfs_mount *mp, + struct block_device *bdev, + int external, + const char *fsname) +{ + xfs_buftarg_t *btp; + + btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); + + btp->bt_mount = mp; + btp->bt_dev = bdev->bd_dev; + btp->bt_bdev = bdev; + btp->bt_bdi = blk_get_backing_dev_info(bdev); + if (!btp->bt_bdi) + goto error; + + INIT_LIST_HEAD(&btp->bt_lru); + spin_lock_init(&btp->bt_lru_lock); + if (xfs_setsize_buftarg_early(btp, bdev)) + goto error; + if (xfs_alloc_delwrite_queue(btp, fsname)) + goto error; + btp->bt_shrinker.shrink = xfs_buftarg_shrink; + btp->bt_shrinker.seeks = DEFAULT_SEEKS; + register_shrinker(&btp->bt_shrinker); + return btp; + +error: + kmem_free(btp); + return NULL; +} + + +/* + * Delayed write buffer handling + */ +STATIC void +xfs_buf_delwri_queue( + xfs_buf_t *bp, + int unlock) +{ + struct list_head *dwq = &bp->b_target->bt_delwrite_queue; + spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; + + trace_xfs_buf_delwri_queue(bp, _RET_IP_); + + ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); + + spin_lock(dwlk); + /* If already in the queue, dequeue and place at tail */ + if (!list_empty(&bp->b_list)) { + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + if (unlock) + atomic_dec(&bp->b_hold); + list_del(&bp->b_list); + } + + if (list_empty(dwq)) { + /* start xfsbufd as it is about to have something to do */ + wake_up_process(bp->b_target->bt_task); + } + + bp->b_flags |= _XBF_DELWRI_Q; + list_add_tail(&bp->b_list, dwq); + bp->b_queuetime = jiffies; + spin_unlock(dwlk); + + if (unlock) + xfs_buf_unlock(bp); +} + +void +xfs_buf_delwri_dequeue( + xfs_buf_t *bp) +{ + spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; + int dequeued = 0; + + spin_lock(dwlk); + if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + list_del_init(&bp->b_list); + dequeued = 1; + } + bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); + spin_unlock(dwlk); + + if (dequeued) + xfs_buf_rele(bp); + + trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); +} + +/* + * If a delwri buffer needs to be pushed before it has aged out, then promote + * it to the head of the delwri queue so that it will be flushed on the next + * xfsbufd run. We do this by resetting the queuetime of the buffer to be older + * than the age currently needed to flush the buffer. Hence the next time the + * xfsbufd sees it is guaranteed to be considered old enough to flush. + */ +void +xfs_buf_delwri_promote( + struct xfs_buf *bp) +{ + struct xfs_buftarg *btp = bp->b_target; + long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1; + + ASSERT(bp->b_flags & XBF_DELWRI); + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + + /* + * Check the buffer age before locking the delayed write queue as we + * don't need to promote buffers that are already past the flush age. + */ + if (bp->b_queuetime < jiffies - age) + return; + bp->b_queuetime = jiffies - age; + spin_lock(&btp->bt_delwrite_lock); + list_move(&bp->b_list, &btp->bt_delwrite_queue); + spin_unlock(&btp->bt_delwrite_lock); +} + +STATIC void +xfs_buf_runall_queues( + struct workqueue_struct *queue) +{ + flush_workqueue(queue); +} + +/* + * Move as many buffers as specified to the supplied list + * idicating if we skipped any buffers to prevent deadlocks. + */ +STATIC int +xfs_buf_delwri_split( + xfs_buftarg_t *target, + struct list_head *list, + unsigned long age) +{ + xfs_buf_t *bp, *n; + struct list_head *dwq = &target->bt_delwrite_queue; + spinlock_t *dwlk = &target->bt_delwrite_lock; + int skipped = 0; + int force; + + force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); + INIT_LIST_HEAD(list); + spin_lock(dwlk); + list_for_each_entry_safe(bp, n, dwq, b_list) { + ASSERT(bp->b_flags & XBF_DELWRI); + + if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) { + if (!force && + time_before(jiffies, bp->b_queuetime + age)) { + xfs_buf_unlock(bp); + break; + } + + bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q); + bp->b_flags |= XBF_WRITE; + list_move_tail(&bp->b_list, list); + trace_xfs_buf_delwri_split(bp, _RET_IP_); + } else + skipped++; + } + spin_unlock(dwlk); + + return skipped; + +} + +/* + * Compare function is more complex than it needs to be because + * the return value is only 32 bits and we are doing comparisons + * on 64 bit values + */ +static int +xfs_buf_cmp( + void *priv, + struct list_head *a, + struct list_head *b) +{ + struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); + struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); + xfs_daddr_t diff; + + diff = ap->b_bn - bp->b_bn; + if (diff < 0) + return -1; + if (diff > 0) + return 1; + return 0; +} + +STATIC int +xfsbufd( + void *data) +{ + xfs_buftarg_t *target = (xfs_buftarg_t *)data; + + current->flags |= PF_MEMALLOC; + + set_freezable(); + + do { + long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); + long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); + struct list_head tmp; + struct blk_plug plug; + + if (unlikely(freezing(current))) { + set_bit(XBT_FORCE_SLEEP, &target->bt_flags); + refrigerator(); + } else { + clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); + } + + /* sleep for a long time if there is nothing to do. */ + if (list_empty(&target->bt_delwrite_queue)) + tout = MAX_SCHEDULE_TIMEOUT; + schedule_timeout_interruptible(tout); + + xfs_buf_delwri_split(target, &tmp, age); + list_sort(NULL, &tmp, xfs_buf_cmp); + + blk_start_plug(&plug); + while (!list_empty(&tmp)) { + struct xfs_buf *bp; + bp = list_first_entry(&tmp, struct xfs_buf, b_list); + list_del_init(&bp->b_list); + xfs_bdstrat_cb(bp); + } + blk_finish_plug(&plug); + } while (!kthread_should_stop()); + + return 0; +} + +/* + * Go through all incore buffers, and release buffers if they belong to + * the given device. This is used in filesystem error handling to + * preserve the consistency of its metadata. + */ +int +xfs_flush_buftarg( + xfs_buftarg_t *target, + int wait) +{ + xfs_buf_t *bp; + int pincount = 0; + LIST_HEAD(tmp_list); + LIST_HEAD(wait_list); + struct blk_plug plug; + + xfs_buf_runall_queues(xfsconvertd_workqueue); + xfs_buf_runall_queues(xfsdatad_workqueue); + xfs_buf_runall_queues(xfslogd_workqueue); + + set_bit(XBT_FORCE_FLUSH, &target->bt_flags); + pincount = xfs_buf_delwri_split(target, &tmp_list, 0); + + /* + * Dropped the delayed write list lock, now walk the temporary list. + * All I/O is issued async and then if we need to wait for completion + * we do that after issuing all the IO. + */ + list_sort(NULL, &tmp_list, xfs_buf_cmp); + + blk_start_plug(&plug); + while (!list_empty(&tmp_list)) { + bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); + ASSERT(target == bp->b_target); + list_del_init(&bp->b_list); + if (wait) { + bp->b_flags &= ~XBF_ASYNC; + list_add(&bp->b_list, &wait_list); + } + xfs_bdstrat_cb(bp); + } + blk_finish_plug(&plug); + + if (wait) { + /* Wait for IO to complete. */ + while (!list_empty(&wait_list)) { + bp = list_first_entry(&wait_list, struct xfs_buf, b_list); + + list_del_init(&bp->b_list); + xfs_buf_iowait(bp); + xfs_buf_relse(bp); + } + } + + return pincount; +} + +int __init +xfs_buf_init(void) +{ + xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", + KM_ZONE_HWALIGN, NULL); + if (!xfs_buf_zone) + goto out; + + xfslogd_workqueue = alloc_workqueue("xfslogd", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 1); + if (!xfslogd_workqueue) + goto out_free_buf_zone; + + xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1); + if (!xfsdatad_workqueue) + goto out_destroy_xfslogd_workqueue; + + xfsconvertd_workqueue = alloc_workqueue("xfsconvertd", + WQ_MEM_RECLAIM, 1); + if (!xfsconvertd_workqueue) + goto out_destroy_xfsdatad_workqueue; + + return 0; + + out_destroy_xfsdatad_workqueue: + destroy_workqueue(xfsdatad_workqueue); + out_destroy_xfslogd_workqueue: + destroy_workqueue(xfslogd_workqueue); + out_free_buf_zone: + kmem_zone_destroy(xfs_buf_zone); + out: + return -ENOMEM; +} + +void +xfs_buf_terminate(void) +{ + destroy_workqueue(xfsconvertd_workqueue); + destroy_workqueue(xfsdatad_workqueue); + destroy_workqueue(xfslogd_workqueue); + kmem_zone_destroy(xfs_buf_zone); +} + +#ifdef CONFIG_KDB_MODULES +struct list_head * +xfs_get_buftarg_list(void) +{ + return &xfs_buftarg_list; +} +#endif diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h new file mode 100644 index 0000000..620972b --- /dev/null +++ b/fs/xfs/xfs_buf.h @@ -0,0 +1,326 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_BUF_H__ +#define __XFS_BUF_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Base types + */ + +#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) + +#define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) +#define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) +#define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) +#define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) + +typedef enum { + XBRW_READ = 1, /* transfer into target memory */ + XBRW_WRITE = 2, /* transfer from target memory */ + XBRW_ZERO = 3, /* Zero target memory */ +} xfs_buf_rw_t; + +#define XBF_READ (1 << 0) /* buffer intended for reading from device */ +#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ +#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ +#define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */ +#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ +#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ +#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ +#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ + +/* I/O hints for the BIO layer */ +#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ +#define XBF_FUA (1 << 11)/* force cache write through mode */ +#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ + +/* flags used only as arguments to access routines */ +#define XBF_LOCK (1 << 15)/* lock requested */ +#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ +#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */ + +/* flags used only internally */ +#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ +#define _XBF_KMEM (1 << 21)/* backed by heap memory */ +#define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */ + +typedef unsigned int xfs_buf_flags_t; + +#define XFS_BUF_FLAGS \ + { XBF_READ, "READ" }, \ + { XBF_WRITE, "WRITE" }, \ + { XBF_READ_AHEAD, "READ_AHEAD" }, \ + { XBF_MAPPED, "MAPPED" }, \ + { XBF_ASYNC, "ASYNC" }, \ + { XBF_DONE, "DONE" }, \ + { XBF_DELWRI, "DELWRI" }, \ + { XBF_STALE, "STALE" }, \ + { XBF_SYNCIO, "SYNCIO" }, \ + { XBF_FUA, "FUA" }, \ + { XBF_FLUSH, "FLUSH" }, \ + { XBF_LOCK, "LOCK" }, /* should never be set */\ + { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ + { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ + { _XBF_PAGES, "PAGES" }, \ + { _XBF_KMEM, "KMEM" }, \ + { _XBF_DELWRI_Q, "DELWRI_Q" } + +typedef enum { + XBT_FORCE_SLEEP = 0, + XBT_FORCE_FLUSH = 1, +} xfs_buftarg_flags_t; + +typedef struct xfs_buftarg { + dev_t bt_dev; + struct block_device *bt_bdev; + struct backing_dev_info *bt_bdi; + struct xfs_mount *bt_mount; + unsigned int bt_bsize; + unsigned int bt_sshift; + size_t bt_smask; + + /* per device delwri queue */ + struct task_struct *bt_task; + struct list_head bt_delwrite_queue; + spinlock_t bt_delwrite_lock; + unsigned long bt_flags; + + /* LRU control structures */ + struct shrinker bt_shrinker; + struct list_head bt_lru; + spinlock_t bt_lru_lock; + unsigned int bt_lru_nr; +} xfs_buftarg_t; + +struct xfs_buf; +typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); + +#define XB_PAGES 2 + +typedef struct xfs_buf { + /* + * first cacheline holds all the fields needed for an uncontended cache + * hit to be fully processed. The semaphore straddles the cacheline + * boundary, but the counter and lock sits on the first cacheline, + * which is the only bit that is touched if we hit the semaphore + * fast-path on locking. + */ + struct rb_node b_rbnode; /* rbtree node */ + xfs_off_t b_file_offset; /* offset in file */ + size_t b_buffer_length;/* size of buffer in bytes */ + atomic_t b_hold; /* reference count */ + atomic_t b_lru_ref; /* lru reclaim ref count */ + xfs_buf_flags_t b_flags; /* status flags */ + struct semaphore b_sema; /* semaphore for lockables */ + + struct list_head b_lru; /* lru list */ + wait_queue_head_t b_waiters; /* unpin waiters */ + struct list_head b_list; + struct xfs_perag *b_pag; /* contains rbtree root */ + xfs_buftarg_t *b_target; /* buffer target (device) */ + xfs_daddr_t b_bn; /* block number for I/O */ + size_t b_count_desired;/* desired transfer size */ + void *b_addr; /* virtual address of buffer */ + struct work_struct b_iodone_work; + xfs_buf_iodone_t b_iodone; /* I/O completion function */ + struct completion b_iowait; /* queue for I/O waiters */ + void *b_fspriv; + struct xfs_trans *b_transp; + struct page **b_pages; /* array of page pointers */ + struct page *b_page_array[XB_PAGES]; /* inline pages */ + unsigned long b_queuetime; /* time buffer was queued */ + atomic_t b_pin_count; /* pin count */ + atomic_t b_io_remaining; /* #outstanding I/O requests */ + unsigned int b_page_count; /* size of page array */ + unsigned int b_offset; /* page offset in first page */ + unsigned short b_error; /* error code on I/O */ +#ifdef XFS_BUF_LOCK_TRACKING + int b_last_holder; +#endif +} xfs_buf_t; + + +/* Finding and Reading Buffers */ +extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t, + xfs_buf_flags_t, xfs_buf_t *); +#define xfs_incore(buftarg,blkno,len,lockit) \ + _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) + +extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t, + xfs_buf_flags_t); +extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, + xfs_buf_flags_t); + +extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); +extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); +extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); +extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); +extern void xfs_buf_hold(xfs_buf_t *); +extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t); +struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp, + struct xfs_buftarg *target, + xfs_daddr_t daddr, size_t length, int flags); + +/* Releasing Buffers */ +extern void xfs_buf_free(xfs_buf_t *); +extern void xfs_buf_rele(xfs_buf_t *); + +/* Locking and Unlocking Buffers */ +extern int xfs_buf_trylock(xfs_buf_t *); +extern void xfs_buf_lock(xfs_buf_t *); +extern void xfs_buf_unlock(xfs_buf_t *); +#define xfs_buf_islocked(bp) \ + ((bp)->b_sema.count <= 0) + +/* Buffer Read and Write Routines */ +extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); +extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); + +extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); +extern int xfs_bdstrat_cb(struct xfs_buf *); + +extern void xfs_buf_ioend(xfs_buf_t *, int); +extern void xfs_buf_ioerror(xfs_buf_t *, int); +extern int xfs_buf_iorequest(xfs_buf_t *); +extern int xfs_buf_iowait(xfs_buf_t *); +extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, + xfs_buf_rw_t); +#define xfs_buf_zero(bp, off, len) \ + xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) + +static inline int xfs_buf_geterror(xfs_buf_t *bp) +{ + return bp ? bp->b_error : ENOMEM; +} + +/* Buffer Utility Routines */ +extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); + +/* Delayed Write Buffer Routines */ +extern void xfs_buf_delwri_dequeue(xfs_buf_t *); +extern void xfs_buf_delwri_promote(xfs_buf_t *); + +/* Buffer Daemon Setup Routines */ +extern int xfs_buf_init(void); +extern void xfs_buf_terminate(void); + +static inline const char * +xfs_buf_target_name(struct xfs_buftarg *target) +{ + static char __b[BDEVNAME_SIZE]; + + return bdevname(target->bt_bdev, __b); +} + + +#define XFS_BUF_ZEROFLAGS(bp) \ + ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ + XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) + +void xfs_buf_stale(struct xfs_buf *bp); +#define XFS_BUF_STALE(bp) xfs_buf_stale(bp); +#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) +#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) +#define XFS_BUF_SUPER_STALE(bp) do { \ + XFS_BUF_STALE(bp); \ + xfs_buf_delwri_dequeue(bp); \ + XFS_BUF_DONE(bp); \ + } while (0) + +#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) +#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) +#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) + +#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) +#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) +#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) + +#define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) +#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) +#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) + +#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) +#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) +#define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) + +#define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE) +#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) +#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) + +#define XFS_BUF_ADDR(bp) ((bp)->b_bn) +#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) +#define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) +#define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off)) +#define XFS_BUF_COUNT(bp) ((bp)->b_count_desired) +#define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt)) +#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) +#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) + +static inline void +xfs_buf_set_ref( + struct xfs_buf *bp, + int lru_ref) +{ + atomic_set(&bp->b_lru_ref, lru_ref); +} +#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) +#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) + +static inline int xfs_buf_ispinned(struct xfs_buf *bp) +{ + return atomic_read(&bp->b_pin_count); +} + +#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); + +static inline void xfs_buf_relse(xfs_buf_t *bp) +{ + xfs_buf_unlock(bp); + xfs_buf_rele(bp); +} + +/* + * Handling of buftargs. + */ +extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, + struct block_device *, int, const char *); +extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); +extern void xfs_wait_buftarg(xfs_buftarg_t *); +extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); +extern int xfs_flush_buftarg(xfs_buftarg_t *, int); + +#ifdef CONFIG_KDB_MODULES +extern struct list_head *xfs_get_buftarg_list(void); +#endif + +#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) +#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) + +#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1) +#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1) + +#endif /* __XFS_BUF_H__ */ diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c new file mode 100644 index 0000000..244e797 --- /dev/null +++ b/fs/xfs/xfs_discard.c @@ -0,0 +1,222 @@ +/* + * Copyright (C) 2010 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_sb.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_quota.h" +#include "xfs_trans.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_error.h" +#include "xfs_discard.h" +#include "xfs_trace.h" + +STATIC int +xfs_trim_extents( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_fsblock_t start, + xfs_fsblock_t len, + xfs_fsblock_t minlen, + __uint64_t *blocks_trimmed) +{ + struct block_device *bdev = mp->m_ddev_targp->bt_bdev; + struct xfs_btree_cur *cur; + struct xfs_buf *agbp; + struct xfs_perag *pag; + int error; + int i; + + pag = xfs_perag_get(mp, agno); + + error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + if (error || !agbp) + goto out_put_perag; + + cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); + + /* + * Force out the log. This means any transactions that might have freed + * space before we took the AGF buffer lock are now on disk, and the + * volatile disk cache is flushed. + */ + xfs_log_force(mp, XFS_LOG_SYNC); + + /* + * Look up the longest btree in the AGF and start with it. + */ + error = xfs_alloc_lookup_le(cur, 0, + XFS_BUF_TO_AGF(agbp)->agf_longest, &i); + if (error) + goto out_del_cursor; + + /* + * Loop until we are done with all extents that are large + * enough to be worth discarding. + */ + while (i) { + xfs_agblock_t fbno; + xfs_extlen_t flen; + + error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); + if (error) + goto out_del_cursor; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); + ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); + + /* + * Too small? Give up. + */ + if (flen < minlen) { + trace_xfs_discard_toosmall(mp, agno, fbno, flen); + goto out_del_cursor; + } + + /* + * If the extent is entirely outside of the range we are + * supposed to discard skip it. Do not bother to trim + * down partially overlapping ranges for now. + */ + if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || + XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) { + trace_xfs_discard_exclude(mp, agno, fbno, flen); + goto next_extent; + } + + /* + * If any blocks in the range are still busy, skip the + * discard and try again the next time. + */ + if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { + trace_xfs_discard_busy(mp, agno, fbno, flen); + goto next_extent; + } + + trace_xfs_discard_extent(mp, agno, fbno, flen); + error = -blkdev_issue_discard(bdev, + XFS_AGB_TO_DADDR(mp, agno, fbno), + XFS_FSB_TO_BB(mp, flen), + GFP_NOFS, 0); + if (error) + goto out_del_cursor; + *blocks_trimmed += flen; + +next_extent: + error = xfs_btree_decrement(cur, 0, &i); + if (error) + goto out_del_cursor; + } + +out_del_cursor: + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_buf_relse(agbp); +out_put_perag: + xfs_perag_put(pag); + return error; +} + +int +xfs_ioc_trim( + struct xfs_mount *mp, + struct fstrim_range __user *urange) +{ + struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; + unsigned int granularity = q->limits.discard_granularity; + struct fstrim_range range; + xfs_fsblock_t start, len, minlen; + xfs_agnumber_t start_agno, end_agno, agno; + __uint64_t blocks_trimmed = 0; + int error, last_error = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (!blk_queue_discard(q)) + return -XFS_ERROR(EOPNOTSUPP); + if (copy_from_user(&range, urange, sizeof(range))) + return -XFS_ERROR(EFAULT); + + /* + * Truncating down the len isn't actually quite correct, but using + * XFS_B_TO_FSB would mean we trivially get overflows for values + * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default + * used by the fstrim application. In the end it really doesn't + * matter as trimming blocks is an advisory interface. + */ + start = XFS_B_TO_FSBT(mp, range.start); + len = XFS_B_TO_FSBT(mp, range.len); + minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); + + start_agno = XFS_FSB_TO_AGNO(mp, start); + if (start_agno >= mp->m_sb.sb_agcount) + return -XFS_ERROR(EINVAL); + + end_agno = XFS_FSB_TO_AGNO(mp, start + len); + if (end_agno >= mp->m_sb.sb_agcount) + end_agno = mp->m_sb.sb_agcount - 1; + + for (agno = start_agno; agno <= end_agno; agno++) { + error = -xfs_trim_extents(mp, agno, start, len, minlen, + &blocks_trimmed); + if (error) + last_error = error; + } + + if (last_error) + return last_error; + + range.len = XFS_FSB_TO_B(mp, blocks_trimmed); + if (copy_to_user(urange, &range, sizeof(range))) + return -XFS_ERROR(EFAULT); + return 0; +} + +int +xfs_discard_extents( + struct xfs_mount *mp, + struct list_head *list) +{ + struct xfs_busy_extent *busyp; + int error = 0; + + list_for_each_entry(busyp, list, list) { + trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, + busyp->length); + + error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, + XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), + XFS_FSB_TO_BB(mp, busyp->length), + GFP_NOFS, 0); + if (error && error != EOPNOTSUPP) { + xfs_info(mp, + "discard failed for extent [0x%llu,%u], error %d", + (unsigned long long)busyp->bno, + busyp->length, + error); + return error; + } + } + + return 0; +} diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h new file mode 100644 index 0000000..344879a --- /dev/null +++ b/fs/xfs/xfs_discard.h @@ -0,0 +1,10 @@ +#ifndef XFS_DISCARD_H +#define XFS_DISCARD_H 1 + +struct fstrim_range; +struct list_head; + +extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); +extern int xfs_discard_extents(struct xfs_mount *, struct list_head *); + +#endif /* XFS_DISCARD_H */ diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c new file mode 100644 index 0000000..db62959 --- /dev/null +++ b/fs/xfs/xfs_dquot.c @@ -0,0 +1,1454 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_trans_space.h" +#include "xfs_trans_priv.h" +#include "xfs_qm.h" +#include "xfs_trace.h" + + +/* + LOCK ORDER + + inode lock (ilock) + dquot hash-chain lock (hashlock) + xqm dquot freelist lock (freelistlock + mount's dquot list lock (mplistlock) + user dquot lock - lock ordering among dquots is based on the uid or gid + group dquot lock - similar to udquots. Between the two dquots, the udquot + has to be locked first. + pin lock - the dquot lock must be held to take this lock. + flush lock - ditto. +*/ + +#ifdef DEBUG +xfs_buftarg_t *xfs_dqerror_target; +int xfs_do_dqerror; +int xfs_dqreq_num; +int xfs_dqerror_mod = 33; +#endif + +static struct lock_class_key xfs_dquot_other_class; + +/* + * Allocate and initialize a dquot. We don't always allocate fresh memory; + * we try to reclaim a free dquot if the number of incore dquots are above + * a threshold. + * The only field inside the core that gets initialized at this point + * is the d_id field. The idea is to fill in the entire q_core + * when we read in the on disk dquot. + */ +STATIC xfs_dquot_t * +xfs_qm_dqinit( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type) +{ + xfs_dquot_t *dqp; + boolean_t brandnewdquot; + + brandnewdquot = xfs_qm_dqalloc_incore(&dqp); + dqp->dq_flags = type; + dqp->q_core.d_id = cpu_to_be32(id); + dqp->q_mount = mp; + + /* + * No need to re-initialize these if this is a reclaimed dquot. + */ + if (brandnewdquot) { + INIT_LIST_HEAD(&dqp->q_freelist); + mutex_init(&dqp->q_qlock); + init_waitqueue_head(&dqp->q_pinwait); + + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&dqp->q_flush); + complete(&dqp->q_flush); + + trace_xfs_dqinit(dqp); + } else { + /* + * Only the q_core portion was zeroed in dqreclaim_one(). + * So, we need to reset others. + */ + dqp->q_nrefs = 0; + dqp->q_blkno = 0; + INIT_LIST_HEAD(&dqp->q_mplist); + INIT_LIST_HEAD(&dqp->q_hashlist); + dqp->q_bufoffset = 0; + dqp->q_fileoffset = 0; + dqp->q_transp = NULL; + dqp->q_gdquot = NULL; + dqp->q_res_bcount = 0; + dqp->q_res_icount = 0; + dqp->q_res_rtbcount = 0; + atomic_set(&dqp->q_pincount, 0); + dqp->q_hash = NULL; + ASSERT(list_empty(&dqp->q_freelist)); + + trace_xfs_dqreuse(dqp); + } + + /* + * In either case we need to make sure group quotas have a different + * lock class than user quotas, to make sure lockdep knows we can + * locks of one of each at the same time. + */ + if (!(type & XFS_DQ_USER)) + lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); + + /* + * log item gets initialized later + */ + return (dqp); +} + +/* + * This is called to free all the memory associated with a dquot + */ +void +xfs_qm_dqdestroy( + xfs_dquot_t *dqp) +{ + ASSERT(list_empty(&dqp->q_freelist)); + + mutex_destroy(&dqp->q_qlock); + kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); + + atomic_dec(&xfs_Gqm->qm_totaldquots); +} + +/* + * This is what a 'fresh' dquot inside a dquot chunk looks like on disk. + */ +STATIC void +xfs_qm_dqinit_core( + xfs_dqid_t id, + uint type, + xfs_dqblk_t *d) +{ + /* + * Caller has zero'd the entire dquot 'chunk' already. + */ + d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); + d->dd_diskdq.d_version = XFS_DQUOT_VERSION; + d->dd_diskdq.d_id = cpu_to_be32(id); + d->dd_diskdq.d_flags = type; +} + +/* + * If default limits are in force, push them into the dquot now. + * We overwrite the dquot limits only if they are zero and this + * is not the root dquot. + */ +void +xfs_qm_adjust_dqlimits( + xfs_mount_t *mp, + xfs_disk_dquot_t *d) +{ + xfs_quotainfo_t *q = mp->m_quotainfo; + + ASSERT(d->d_id); + + if (q->qi_bsoftlimit && !d->d_blk_softlimit) + d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); + if (q->qi_bhardlimit && !d->d_blk_hardlimit) + d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); + if (q->qi_isoftlimit && !d->d_ino_softlimit) + d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); + if (q->qi_ihardlimit && !d->d_ino_hardlimit) + d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit); + if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit) + d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); + if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) + d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); +} + +/* + * Check the limits and timers of a dquot and start or reset timers + * if necessary. + * This gets called even when quota enforcement is OFF, which makes our + * life a little less complicated. (We just don't reject any quota + * reservations in that case, when enforcement is off). + * We also return 0 as the values of the timers in Q_GETQUOTA calls, when + * enforcement's off. + * In contrast, warnings are a little different in that they don't + * 'automatically' get started when limits get exceeded. They do + * get reset to zero, however, when we find the count to be under + * the soft limit (they are only ever set non-zero via userspace). + */ +void +xfs_qm_adjust_dqtimers( + xfs_mount_t *mp, + xfs_disk_dquot_t *d) +{ + ASSERT(d->d_id); + +#ifdef DEBUG + if (d->d_blk_hardlimit) + ASSERT(be64_to_cpu(d->d_blk_softlimit) <= + be64_to_cpu(d->d_blk_hardlimit)); + if (d->d_ino_hardlimit) + ASSERT(be64_to_cpu(d->d_ino_softlimit) <= + be64_to_cpu(d->d_ino_hardlimit)); + if (d->d_rtb_hardlimit) + ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= + be64_to_cpu(d->d_rtb_hardlimit)); +#endif + + if (!d->d_btimer) { + if ((d->d_blk_softlimit && + (be64_to_cpu(d->d_bcount) >= + be64_to_cpu(d->d_blk_softlimit))) || + (d->d_blk_hardlimit && + (be64_to_cpu(d->d_bcount) >= + be64_to_cpu(d->d_blk_hardlimit)))) { + d->d_btimer = cpu_to_be32(get_seconds() + + mp->m_quotainfo->qi_btimelimit); + } else { + d->d_bwarns = 0; + } + } else { + if ((!d->d_blk_softlimit || + (be64_to_cpu(d->d_bcount) < + be64_to_cpu(d->d_blk_softlimit))) && + (!d->d_blk_hardlimit || + (be64_to_cpu(d->d_bcount) < + be64_to_cpu(d->d_blk_hardlimit)))) { + d->d_btimer = 0; + } + } + + if (!d->d_itimer) { + if ((d->d_ino_softlimit && + (be64_to_cpu(d->d_icount) >= + be64_to_cpu(d->d_ino_softlimit))) || + (d->d_ino_hardlimit && + (be64_to_cpu(d->d_icount) >= + be64_to_cpu(d->d_ino_hardlimit)))) { + d->d_itimer = cpu_to_be32(get_seconds() + + mp->m_quotainfo->qi_itimelimit); + } else { + d->d_iwarns = 0; + } + } else { + if ((!d->d_ino_softlimit || + (be64_to_cpu(d->d_icount) < + be64_to_cpu(d->d_ino_softlimit))) && + (!d->d_ino_hardlimit || + (be64_to_cpu(d->d_icount) < + be64_to_cpu(d->d_ino_hardlimit)))) { + d->d_itimer = 0; + } + } + + if (!d->d_rtbtimer) { + if ((d->d_rtb_softlimit && + (be64_to_cpu(d->d_rtbcount) >= + be64_to_cpu(d->d_rtb_softlimit))) || + (d->d_rtb_hardlimit && + (be64_to_cpu(d->d_rtbcount) >= + be64_to_cpu(d->d_rtb_hardlimit)))) { + d->d_rtbtimer = cpu_to_be32(get_seconds() + + mp->m_quotainfo->qi_rtbtimelimit); + } else { + d->d_rtbwarns = 0; + } + } else { + if ((!d->d_rtb_softlimit || + (be64_to_cpu(d->d_rtbcount) < + be64_to_cpu(d->d_rtb_softlimit))) && + (!d->d_rtb_hardlimit || + (be64_to_cpu(d->d_rtbcount) < + be64_to_cpu(d->d_rtb_hardlimit)))) { + d->d_rtbtimer = 0; + } + } +} + +/* + * initialize a buffer full of dquots and log the whole thing + */ +STATIC void +xfs_qm_init_dquot_blk( + xfs_trans_t *tp, + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + xfs_buf_t *bp) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + xfs_dqblk_t *d; + int curid, i; + + ASSERT(tp); + ASSERT(xfs_buf_islocked(bp)); + + d = bp->b_addr; + + /* + * ID of the first dquot in the block - id's are zero based. + */ + curid = id - (id % q->qi_dqperchunk); + ASSERT(curid >= 0); + memset(d, 0, BBTOB(q->qi_dqchunklen)); + for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) + xfs_qm_dqinit_core(curid, type, d); + xfs_trans_dquot_buf(tp, bp, + (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : + ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : + XFS_BLF_GDQUOT_BUF))); + xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); +} + + + +/* + * Allocate a block and fill it with dquots. + * This is called when the bmapi finds a hole. + */ +STATIC int +xfs_qm_dqalloc( + xfs_trans_t **tpp, + xfs_mount_t *mp, + xfs_dquot_t *dqp, + xfs_inode_t *quotip, + xfs_fileoff_t offset_fsb, + xfs_buf_t **O_bpp) +{ + xfs_fsblock_t firstblock; + xfs_bmap_free_t flist; + xfs_bmbt_irec_t map; + int nmaps, error, committed; + xfs_buf_t *bp; + xfs_trans_t *tp = *tpp; + + ASSERT(tp != NULL); + + trace_xfs_dqalloc(dqp); + + /* + * Initialize the bmap freelist prior to calling bmapi code. + */ + xfs_bmap_init(&flist, &firstblock); + xfs_ilock(quotip, XFS_ILOCK_EXCL); + /* + * Return if this type of quotas is turned off while we didn't + * have an inode lock + */ + if (XFS_IS_THIS_QUOTA_OFF(dqp)) { + xfs_iunlock(quotip, XFS_ILOCK_EXCL); + return (ESRCH); + } + + xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL); + nmaps = 1; + if ((error = xfs_bmapi(tp, quotip, + offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, + XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, + &firstblock, + XFS_QM_DQALLOC_SPACE_RES(mp), + &map, &nmaps, &flist))) { + goto error0; + } + ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); + ASSERT(nmaps == 1); + ASSERT((map.br_startblock != DELAYSTARTBLOCK) && + (map.br_startblock != HOLESTARTBLOCK)); + + /* + * Keep track of the blkno to save a lookup later + */ + dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); + + /* now we can just get the buffer (there's nothing to read yet) */ + bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, + dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, + 0); + if (!bp || (error = xfs_buf_geterror(bp))) + goto error1; + /* + * Make a chunk of dquots out of this buffer and log + * the entire thing. + */ + xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), + dqp->dq_flags & XFS_DQ_ALLTYPES, bp); + + /* + * xfs_bmap_finish() may commit the current transaction and + * start a second transaction if the freelist is not empty. + * + * Since we still want to modify this buffer, we need to + * ensure that the buffer is not released on commit of + * the first transaction and ensure the buffer is added to the + * second transaction. + * + * If there is only one transaction then don't stop the buffer + * from being released when it commits later on. + */ + + xfs_trans_bhold(tp, bp); + + if ((error = xfs_bmap_finish(tpp, &flist, &committed))) { + goto error1; + } + + if (committed) { + tp = *tpp; + xfs_trans_bjoin(tp, bp); + } else { + xfs_trans_bhold_release(tp, bp); + } + + *O_bpp = bp; + return 0; + + error1: + xfs_bmap_cancel(&flist); + error0: + xfs_iunlock(quotip, XFS_ILOCK_EXCL); + + return (error); +} + +/* + * Maps a dquot to the buffer containing its on-disk version. + * This returns a ptr to the buffer containing the on-disk dquot + * in the bpp param, and a ptr to the on-disk dquot within that buffer + */ +STATIC int +xfs_qm_dqtobp( + xfs_trans_t **tpp, + xfs_dquot_t *dqp, + xfs_disk_dquot_t **O_ddpp, + xfs_buf_t **O_bpp, + uint flags) +{ + xfs_bmbt_irec_t map; + int nmaps = 1, error; + xfs_buf_t *bp; + xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); + xfs_mount_t *mp = dqp->q_mount; + xfs_disk_dquot_t *ddq; + xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); + xfs_trans_t *tp = (tpp ? *tpp : NULL); + + dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; + + xfs_ilock(quotip, XFS_ILOCK_SHARED); + if (XFS_IS_THIS_QUOTA_OFF(dqp)) { + /* + * Return if this type of quotas is turned off while we + * didn't have the quota inode lock. + */ + xfs_iunlock(quotip, XFS_ILOCK_SHARED); + return ESRCH; + } + + /* + * Find the block map; no allocations yet + */ + error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, + XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, + NULL, 0, &map, &nmaps, NULL); + + xfs_iunlock(quotip, XFS_ILOCK_SHARED); + if (error) + return error; + + ASSERT(nmaps == 1); + ASSERT(map.br_blockcount == 1); + + /* + * Offset of dquot in the (fixed sized) dquot chunk. + */ + dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * + sizeof(xfs_dqblk_t); + + ASSERT(map.br_startblock != DELAYSTARTBLOCK); + if (map.br_startblock == HOLESTARTBLOCK) { + /* + * We don't allocate unless we're asked to + */ + if (!(flags & XFS_QMOPT_DQALLOC)) + return ENOENT; + + ASSERT(tp); + error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, + dqp->q_fileoffset, &bp); + if (error) + return error; + tp = *tpp; + } else { + trace_xfs_dqtobp_read(dqp); + + /* + * store the blkno etc so that we don't have to do the + * mapping all the time + */ + dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); + + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, + 0, &bp); + if (error || !bp) + return XFS_ERROR(error); + } + + ASSERT(xfs_buf_islocked(bp)); + + /* + * calculate the location of the dquot inside the buffer. + */ + ddq = bp->b_addr + dqp->q_bufoffset; + + /* + * A simple sanity check in case we got a corrupted dquot... + */ + error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, + flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), + "dqtobp"); + if (error) { + if (!(flags & XFS_QMOPT_DQREPAIR)) { + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EIO); + } + } + + *O_bpp = bp; + *O_ddpp = ddq; + + return (0); +} + + +/* + * Read in the ondisk dquot using dqtobp() then copy it to an incore version, + * and release the buffer immediately. + * + */ +/* ARGSUSED */ +STATIC int +xfs_qm_dqread( + xfs_trans_t **tpp, + xfs_dqid_t id, + xfs_dquot_t *dqp, /* dquot to get filled in */ + uint flags) +{ + xfs_disk_dquot_t *ddqp; + xfs_buf_t *bp; + int error; + xfs_trans_t *tp; + + ASSERT(tpp); + + trace_xfs_dqread(dqp); + + /* + * get a pointer to the on-disk dquot and the buffer containing it + * dqp already knows its own type (GROUP/USER). + */ + if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { + return (error); + } + tp = *tpp; + + /* copy everything from disk dquot to the incore dquot */ + memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); + ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); + xfs_qm_dquot_logitem_init(dqp); + + /* + * Reservation counters are defined as reservation plus current usage + * to avoid having to add every time. + */ + dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); + dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); + dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); + + /* Mark the buf so that this will stay incore a little longer */ + XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF); + + /* + * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) + * So we need to release with xfs_trans_brelse(). + * The strategy here is identical to that of inodes; we lock + * the dquot in xfs_qm_dqget() before making it accessible to + * others. This is because dquots, like inodes, need a good level of + * concurrency, and we don't want to take locks on the entire buffers + * for dquot accesses. + * Note also that the dquot buffer may even be dirty at this point, if + * this particular dquot was repaired. We still aren't afraid to + * brelse it because we have the changes incore. + */ + ASSERT(xfs_buf_islocked(bp)); + xfs_trans_brelse(tp, bp); + + return (error); +} + + +/* + * allocate an incore dquot from the kernel heap, + * and fill its core with quota information kept on disk. + * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk + * if it wasn't already allocated. + */ +STATIC int +xfs_qm_idtodq( + xfs_mount_t *mp, + xfs_dqid_t id, /* gid or uid, depending on type */ + uint type, /* UDQUOT or GDQUOT */ + uint flags, /* DQALLOC, DQREPAIR */ + xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */ +{ + xfs_dquot_t *dqp; + int error; + xfs_trans_t *tp; + int cancelflags=0; + + dqp = xfs_qm_dqinit(mp, id, type); + tp = NULL; + if (flags & XFS_QMOPT_DQALLOC) { + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); + error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), + XFS_WRITE_LOG_RES(mp) + + BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + + 128, + 0, + XFS_TRANS_PERM_LOG_RES, + XFS_WRITE_LOG_COUNT); + if (error) { + cancelflags = 0; + goto error0; + } + cancelflags = XFS_TRANS_RELEASE_LOG_RES; + } + + /* + * Read it from disk; xfs_dqread() takes care of + * all the necessary initialization of dquot's fields (locks, etc) + */ + if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) { + /* + * This can happen if quotas got turned off (ESRCH), + * or if the dquot didn't exist on disk and we ask to + * allocate (ENOENT). + */ + trace_xfs_dqread_fail(dqp); + cancelflags |= XFS_TRANS_ABORT; + goto error0; + } + if (tp) { + if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) + goto error1; + } + + *O_dqpp = dqp; + return (0); + + error0: + ASSERT(error); + if (tp) + xfs_trans_cancel(tp, cancelflags); + error1: + xfs_qm_dqdestroy(dqp); + *O_dqpp = NULL; + return (error); +} + +/* + * Lookup a dquot in the incore dquot hashtable. We keep two separate + * hashtables for user and group dquots; and, these are global tables + * inside the XQM, not per-filesystem tables. + * The hash chain must be locked by caller, and it is left locked + * on return. Returning dquot is locked. + */ +STATIC int +xfs_qm_dqlookup( + xfs_mount_t *mp, + xfs_dqid_t id, + xfs_dqhash_t *qh, + xfs_dquot_t **O_dqpp) +{ + xfs_dquot_t *dqp; + uint flist_locked; + + ASSERT(mutex_is_locked(&qh->qh_lock)); + + flist_locked = B_FALSE; + + /* + * Traverse the hashchain looking for a match + */ + list_for_each_entry(dqp, &qh->qh_list, q_hashlist) { + /* + * We already have the hashlock. We don't need the + * dqlock to look at the id field of the dquot, since the + * id can't be modified without the hashlock anyway. + */ + if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { + trace_xfs_dqlookup_found(dqp); + + /* + * All in core dquots must be on the dqlist of mp + */ + ASSERT(!list_empty(&dqp->q_mplist)); + + xfs_dqlock(dqp); + if (dqp->q_nrefs == 0) { + ASSERT(!list_empty(&dqp->q_freelist)); + if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { + trace_xfs_dqlookup_want(dqp); + + /* + * We may have raced with dqreclaim_one() + * (and lost). So, flag that we don't + * want the dquot to be reclaimed. + */ + dqp->dq_flags |= XFS_DQ_WANT; + xfs_dqunlock(dqp); + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + xfs_dqlock(dqp); + dqp->dq_flags &= ~(XFS_DQ_WANT); + } + flist_locked = B_TRUE; + } + + /* + * id couldn't have changed; we had the hashlock all + * along + */ + ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); + + if (flist_locked) { + if (dqp->q_nrefs != 0) { + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); + flist_locked = B_FALSE; + } else { + /* take it off the freelist */ + trace_xfs_dqlookup_freelist(dqp); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + } + } + + XFS_DQHOLD(dqp); + + if (flist_locked) + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); + /* + * move the dquot to the front of the hashchain + */ + ASSERT(mutex_is_locked(&qh->qh_lock)); + list_move(&dqp->q_hashlist, &qh->qh_list); + trace_xfs_dqlookup_done(dqp); + *O_dqpp = dqp; + return 0; + } + } + + *O_dqpp = NULL; + ASSERT(mutex_is_locked(&qh->qh_lock)); + return (1); +} + +/* + * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a + * a locked dquot, doing an allocation (if requested) as needed. + * When both an inode and an id are given, the inode's id takes precedence. + * That is, if the id changes while we don't hold the ilock inside this + * function, the new dquot is returned, not necessarily the one requested + * in the id argument. + */ +int +xfs_qm_dqget( + xfs_mount_t *mp, + xfs_inode_t *ip, /* locked inode (optional) */ + xfs_dqid_t id, /* uid/projid/gid depending on type */ + uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */ + uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ + xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ +{ + xfs_dquot_t *dqp; + xfs_dqhash_t *h; + uint version; + int error; + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || + (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || + (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { + return (ESRCH); + } + h = XFS_DQ_HASH(mp, id, type); + +#ifdef DEBUG + if (xfs_do_dqerror) { + if ((xfs_dqerror_target == mp->m_ddev_targp) && + (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { + xfs_debug(mp, "Returning error in dqget"); + return (EIO); + } + } +#endif + + again: + +#ifdef DEBUG + ASSERT(type == XFS_DQ_USER || + type == XFS_DQ_PROJ || + type == XFS_DQ_GROUP); + if (ip) { + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + if (type == XFS_DQ_USER) + ASSERT(ip->i_udquot == NULL); + else + ASSERT(ip->i_gdquot == NULL); + } +#endif + mutex_lock(&h->qh_lock); + + /* + * Look in the cache (hashtable). + * The chain is kept locked during lookup. + */ + if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) { + XQM_STATS_INC(xqmstats.xs_qm_dqcachehits); + /* + * The dquot was found, moved to the front of the chain, + * taken off the freelist if it was on it, and locked + * at this point. Just unlock the hashchain and return. + */ + ASSERT(*O_dqpp); + ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); + mutex_unlock(&h->qh_lock); + trace_xfs_dqget_hit(*O_dqpp); + return (0); /* success */ + } + XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); + + /* + * Dquot cache miss. We don't want to keep the inode lock across + * a (potential) disk read. Also we don't want to deal with the lock + * ordering between quotainode and this inode. OTOH, dropping the inode + * lock here means dealing with a chown that can happen before + * we re-acquire the lock. + */ + if (ip) + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* + * Save the hashchain version stamp, and unlock the chain, so that + * we don't keep the lock across a disk read + */ + version = h->qh_version; + mutex_unlock(&h->qh_lock); + + /* + * Allocate the dquot on the kernel heap, and read the ondisk + * portion off the disk. Also, do all the necessary initialization + * This can return ENOENT if dquot didn't exist on disk and we didn't + * ask it to allocate; ESRCH if quotas got turned off suddenly. + */ + if ((error = xfs_qm_idtodq(mp, id, type, + flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR| + XFS_QMOPT_DOWARN), + &dqp))) { + if (ip) + xfs_ilock(ip, XFS_ILOCK_EXCL); + return (error); + } + + /* + * See if this is mount code calling to look at the overall quota limits + * which are stored in the id == 0 user or group's dquot. + * Since we may not have done a quotacheck by this point, just return + * the dquot without attaching it to any hashtables, lists, etc, or even + * taking a reference. + * The caller must dqdestroy this once done. + */ + if (flags & XFS_QMOPT_DQSUSER) { + ASSERT(id == 0); + ASSERT(! ip); + goto dqret; + } + + /* + * Dquot lock comes after hashlock in the lock ordering + */ + if (ip) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * A dquot could be attached to this inode by now, since + * we had dropped the ilock. + */ + if (type == XFS_DQ_USER) { + if (!XFS_IS_UQUOTA_ON(mp)) { + /* inode stays locked on return */ + xfs_qm_dqdestroy(dqp); + return XFS_ERROR(ESRCH); + } + if (ip->i_udquot) { + xfs_qm_dqdestroy(dqp); + dqp = ip->i_udquot; + xfs_dqlock(dqp); + goto dqret; + } + } else { + if (!XFS_IS_OQUOTA_ON(mp)) { + /* inode stays locked on return */ + xfs_qm_dqdestroy(dqp); + return XFS_ERROR(ESRCH); + } + if (ip->i_gdquot) { + xfs_qm_dqdestroy(dqp); + dqp = ip->i_gdquot; + xfs_dqlock(dqp); + goto dqret; + } + } + } + + /* + * Hashlock comes after ilock in lock order + */ + mutex_lock(&h->qh_lock); + if (version != h->qh_version) { + xfs_dquot_t *tmpdqp; + /* + * Now, see if somebody else put the dquot in the + * hashtable before us. This can happen because we didn't + * keep the hashchain lock. We don't have to worry about + * lock order between the two dquots here since dqp isn't + * on any findable lists yet. + */ + if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) { + /* + * Duplicate found. Just throw away the new dquot + * and start over. + */ + xfs_qm_dqput(tmpdqp); + mutex_unlock(&h->qh_lock); + xfs_qm_dqdestroy(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); + goto again; + } + } + + /* + * Put the dquot at the beginning of the hash-chain and mp's list + * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock .. + */ + ASSERT(mutex_is_locked(&h->qh_lock)); + dqp->q_hash = h; + list_add(&dqp->q_hashlist, &h->qh_list); + h->qh_version++; + + /* + * Attach this dquot to this filesystem's list of all dquots, + * kept inside the mount structure in m_quotainfo field + */ + mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); + + /* + * We return a locked dquot to the caller, with a reference taken + */ + xfs_dqlock(dqp); + dqp->q_nrefs = 1; + + list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist); + mp->m_quotainfo->qi_dquots++; + mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); + mutex_unlock(&h->qh_lock); + dqret: + ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); + trace_xfs_dqget_miss(dqp); + *O_dqpp = dqp; + return (0); +} + + +/* + * Release a reference to the dquot (decrement ref-count) + * and unlock it. If there is a group quota attached to this + * dquot, carefully release that too without tripping over + * deadlocks'n'stuff. + */ +void +xfs_qm_dqput( + xfs_dquot_t *dqp) +{ + xfs_dquot_t *gdqp; + + ASSERT(dqp->q_nrefs > 0); + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + + trace_xfs_dqput(dqp); + + if (dqp->q_nrefs != 1) { + dqp->q_nrefs--; + xfs_dqunlock(dqp); + return; + } + + /* + * drop the dqlock and acquire the freelist and dqlock + * in the right order; but try to get it out-of-order first + */ + if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { + trace_xfs_dqput_wait(dqp); + xfs_dqunlock(dqp); + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + xfs_dqlock(dqp); + } + + while (1) { + gdqp = NULL; + + /* We can't depend on nrefs being == 1 here */ + if (--dqp->q_nrefs == 0) { + trace_xfs_dqput_free(dqp); + + list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); + xfs_Gqm->qm_dqfrlist_cnt++; + + /* + * If we just added a udquot to the freelist, then + * we want to release the gdquot reference that + * it (probably) has. Otherwise it'll keep the + * gdquot from getting reclaimed. + */ + if ((gdqp = dqp->q_gdquot)) { + /* + * Avoid a recursive dqput call + */ + xfs_dqlock(gdqp); + dqp->q_gdquot = NULL; + } + } + xfs_dqunlock(dqp); + + /* + * If we had a group quota inside the user quota as a hint, + * release it now. + */ + if (! gdqp) + break; + dqp = gdqp; + } + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); +} + +/* + * Release a dquot. Flush it if dirty, then dqput() it. + * dquot must not be locked. + */ +void +xfs_qm_dqrele( + xfs_dquot_t *dqp) +{ + if (!dqp) + return; + + trace_xfs_dqrele(dqp); + + xfs_dqlock(dqp); + /* + * We don't care to flush it if the dquot is dirty here. + * That will create stutters that we want to avoid. + * Instead we do a delayed write when we try to reclaim + * a dirty dquot. Also xfs_sync will take part of the burden... + */ + xfs_qm_dqput(dqp); +} + +/* + * This is the dquot flushing I/O completion routine. It is called + * from interrupt level when the buffer containing the dquot is + * flushed to disk. It is responsible for removing the dquot logitem + * from the AIL if it has not been re-logged, and unlocking the dquot's + * flush lock. This behavior is very similar to that of inodes.. + */ +STATIC void +xfs_qm_dqflush_done( + struct xfs_buf *bp, + struct xfs_log_item *lip) +{ + xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; + xfs_dquot_t *dqp = qip->qli_dquot; + struct xfs_ail *ailp = lip->li_ailp; + + /* + * We only want to pull the item from the AIL if its + * location in the log has not changed since we started the flush. + * Thus, we only bother if the dquot's lsn has + * not changed. First we check the lsn outside the lock + * since it's cheaper, and then we recheck while + * holding the lock before removing the dquot from the AIL. + */ + if ((lip->li_flags & XFS_LI_IN_AIL) && + lip->li_lsn == qip->qli_flush_lsn) { + + /* xfs_trans_ail_delete() drops the AIL lock. */ + spin_lock(&ailp->xa_lock); + if (lip->li_lsn == qip->qli_flush_lsn) + xfs_trans_ail_delete(ailp, lip); + else + spin_unlock(&ailp->xa_lock); + } + + /* + * Release the dq's flush lock since we're done with it. + */ + xfs_dqfunlock(dqp); +} + +/* + * Write a modified dquot to disk. + * The dquot must be locked and the flush lock too taken by caller. + * The flush lock will not be unlocked until the dquot reaches the disk, + * but the dquot is free to be unlocked and modified by the caller + * in the interim. Dquot is still locked on return. This behavior is + * identical to that of inodes. + */ +int +xfs_qm_dqflush( + xfs_dquot_t *dqp, + uint flags) +{ + struct xfs_mount *mp = dqp->q_mount; + struct xfs_buf *bp; + struct xfs_disk_dquot *ddqp; + int error; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(!completion_done(&dqp->q_flush)); + + trace_xfs_dqflush(dqp); + + /* + * If not dirty, or it's pinned and we are not supposed to block, nada. + */ + if (!XFS_DQ_IS_DIRTY(dqp) || + (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { + xfs_dqfunlock(dqp); + return 0; + } + xfs_qm_dqunpin_wait(dqp); + + /* + * This may have been unpinned because the filesystem is shutting + * down forcibly. If that's the case we must not write this dquot + * to disk, because the log record didn't make it to disk! + */ + if (XFS_FORCED_SHUTDOWN(mp)) { + dqp->dq_flags &= ~XFS_DQ_DIRTY; + xfs_dqfunlock(dqp); + return XFS_ERROR(EIO); + } + + /* + * Get the buffer containing the on-disk dquot + */ + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, 0, &bp); + if (error) { + ASSERT(error != ENOENT); + xfs_dqfunlock(dqp); + return error; + } + + /* + * Calculate the location of the dquot inside the buffer. + */ + ddqp = bp->b_addr + dqp->q_bufoffset; + + /* + * A simple sanity check in case we got a corrupted dquot.. + */ + error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, + XFS_QMOPT_DOWARN, "dqflush (incore copy)"); + if (error) { + xfs_buf_relse(bp); + xfs_dqfunlock(dqp); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return XFS_ERROR(EIO); + } + + /* This is the only portion of data that needs to persist */ + memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); + + /* + * Clear the dirty field and remember the flush lsn for later use. + */ + dqp->dq_flags &= ~XFS_DQ_DIRTY; + + xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, + &dqp->q_logitem.qli_item.li_lsn); + + /* + * Attach an iodone routine so that we can remove this dquot from the + * AIL and release the flush lock once the dquot is synced to disk. + */ + xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, + &dqp->q_logitem.qli_item); + + /* + * If the buffer is pinned then push on the log so we won't + * get stuck waiting in the write for too long. + */ + if (xfs_buf_ispinned(bp)) { + trace_xfs_dqflush_force(dqp); + xfs_log_force(mp, 0); + } + + if (flags & SYNC_WAIT) + error = xfs_bwrite(mp, bp); + else + xfs_bdwrite(mp, bp); + + trace_xfs_dqflush_done(dqp); + + /* + * dqp is still locked, but caller is free to unlock it now. + */ + return error; + +} + +int +xfs_qm_dqlock_nowait( + xfs_dquot_t *dqp) +{ + return mutex_trylock(&dqp->q_qlock); +} + +void +xfs_dqlock( + xfs_dquot_t *dqp) +{ + mutex_lock(&dqp->q_qlock); +} + +void +xfs_dqunlock( + xfs_dquot_t *dqp) +{ + mutex_unlock(&(dqp->q_qlock)); + if (dqp->q_logitem.qli_dquot == dqp) { + /* Once was dqp->q_mount, but might just have been cleared */ + xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, + (xfs_log_item_t*)&(dqp->q_logitem)); + } +} + + +void +xfs_dqunlock_nonotify( + xfs_dquot_t *dqp) +{ + mutex_unlock(&(dqp->q_qlock)); +} + +/* + * Lock two xfs_dquot structures. + * + * To avoid deadlocks we always lock the quota structure with + * the lowerd id first. + */ +void +xfs_dqlock2( + xfs_dquot_t *d1, + xfs_dquot_t *d2) +{ + if (d1 && d2) { + ASSERT(d1 != d2); + if (be32_to_cpu(d1->q_core.d_id) > + be32_to_cpu(d2->q_core.d_id)) { + mutex_lock(&d2->q_qlock); + mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED); + } else { + mutex_lock(&d1->q_qlock); + mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED); + } + } else if (d1) { + mutex_lock(&d1->q_qlock); + } else if (d2) { + mutex_lock(&d2->q_qlock); + } +} + + +/* + * Take a dquot out of the mount's dqlist as well as the hashlist. + * This is called via unmount as well as quotaoff, and the purge + * will always succeed unless there are soft (temp) references + * outstanding. + * + * This returns 0 if it was purged, 1 if it wasn't. It's not an error code + * that we're returning! XXXsup - not cool. + */ +/* ARGSUSED */ +int +xfs_qm_dqpurge( + xfs_dquot_t *dqp) +{ + xfs_dqhash_t *qh = dqp->q_hash; + xfs_mount_t *mp = dqp->q_mount; + + ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock)); + ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock)); + + xfs_dqlock(dqp); + /* + * We really can't afford to purge a dquot that is + * referenced, because these are hard refs. + * It shouldn't happen in general because we went thru _all_ inodes in + * dqrele_all_inodes before calling this and didn't let the mountlock go. + * However it is possible that we have dquots with temporary + * references that are not attached to an inode. e.g. see xfs_setattr(). + */ + if (dqp->q_nrefs != 0) { + xfs_dqunlock(dqp); + mutex_unlock(&dqp->q_hash->qh_lock); + return (1); + } + + ASSERT(!list_empty(&dqp->q_freelist)); + + /* + * If we're turning off quotas, we have to make sure that, for + * example, we don't delete quota disk blocks while dquots are + * in the process of getting written to those disk blocks. + * This dquot might well be on AIL, and we can't leave it there + * if we're turning off quotas. Basically, we need this flush + * lock, and are willing to block on it. + */ + if (!xfs_dqflock_nowait(dqp)) { + /* + * Block on the flush lock after nudging dquot buffer, + * if it is incore. + */ + xfs_qm_dqflock_pushbuf_wait(dqp); + } + + /* + * XXXIf we're turning this type of quotas off, we don't care + * about the dirty metadata sitting in this dquot. OTOH, if + * we're unmounting, we do care, so we flush it and wait. + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + int error; + + /* dqflush unlocks dqflock */ + /* + * Given that dqpurge is a very rare occurrence, it is OK + * that we're holding the hashlist and mplist locks + * across the disk write. But, ... XXXsup + * + * We don't care about getting disk errors here. We need + * to purge this dquot anyway, so we go ahead regardless. + */ + error = xfs_qm_dqflush(dqp, SYNC_WAIT); + if (error) + xfs_warn(mp, "%s: dquot %p flush failed", + __func__, dqp); + xfs_dqflock(dqp); + } + ASSERT(atomic_read(&dqp->q_pincount) == 0); + ASSERT(XFS_FORCED_SHUTDOWN(mp) || + !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); + + list_del_init(&dqp->q_hashlist); + qh->qh_version++; + list_del_init(&dqp->q_mplist); + mp->m_quotainfo->qi_dqreclaims++; + mp->m_quotainfo->qi_dquots--; + /* + * XXX Move this to the front of the freelist, if we can get the + * freelist lock. + */ + ASSERT(!list_empty(&dqp->q_freelist)); + + dqp->q_mount = NULL; + dqp->q_hash = NULL; + dqp->dq_flags = XFS_DQ_INACTIVE; + memset(&dqp->q_core, 0, sizeof(dqp->q_core)); + xfs_dqfunlock(dqp); + xfs_dqunlock(dqp); + mutex_unlock(&qh->qh_lock); + return (0); +} + + +/* + * Give the buffer a little push if it is incore and + * wait on the flush lock. + */ +void +xfs_qm_dqflock_pushbuf_wait( + xfs_dquot_t *dqp) +{ + xfs_mount_t *mp = dqp->q_mount; + xfs_buf_t *bp; + + /* + * Check to see if the dquot has been flushed delayed + * write. If so, grab its buffer and send it + * out immediately. We'll be able to acquire + * the flush lock when the I/O completes. + */ + bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); + if (!bp) + goto out_lock; + + if (XFS_BUF_ISDELAYWRITE(bp)) { + if (xfs_buf_ispinned(bp)) + xfs_log_force(mp, 0); + xfs_buf_delwri_promote(bp); + wake_up_process(bp->b_target->bt_task); + } + xfs_buf_relse(bp); +out_lock: + xfs_dqflock(dqp); +} diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h new file mode 100644 index 0000000..34b7e94 --- /dev/null +++ b/fs/xfs/xfs_dquot.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DQUOT_H__ +#define __XFS_DQUOT_H__ + +/* + * Dquots are structures that hold quota information about a user or a group, + * much like inodes are for files. In fact, dquots share many characteristics + * with inodes. However, dquots can also be a centralized resource, relative + * to a collection of inodes. In this respect, dquots share some characteristics + * of the superblock. + * XFS dquots exploit both those in its algorithms. They make every attempt + * to not be a bottleneck when quotas are on and have minimal impact, if any, + * when quotas are off. + */ + +/* + * The hash chain headers (hash buckets) + */ +typedef struct xfs_dqhash { + struct list_head qh_list; + struct mutex qh_lock; + uint qh_version; /* ever increasing version */ + uint qh_nelems; /* number of dquots on the list */ +} xfs_dqhash_t; + +struct xfs_mount; +struct xfs_trans; + +/* + * The incore dquot structure + */ +typedef struct xfs_dquot { + uint dq_flags; /* various flags (XFS_DQ_*) */ + struct list_head q_freelist; /* global free list of dquots */ + struct list_head q_mplist; /* mount's list of dquots */ + struct list_head q_hashlist; /* gloabl hash list of dquots */ + xfs_dqhash_t *q_hash; /* the hashchain header */ + struct xfs_mount*q_mount; /* filesystem this relates to */ + struct xfs_trans*q_transp; /* trans this belongs to currently */ + uint q_nrefs; /* # active refs from inodes */ + xfs_daddr_t q_blkno; /* blkno of dquot buffer */ + int q_bufoffset; /* off of dq in buffer (# dquots) */ + xfs_fileoff_t q_fileoffset; /* offset in quotas file */ + + struct xfs_dquot*q_gdquot; /* group dquot, hint only */ + xfs_disk_dquot_t q_core; /* actual usage & quotas */ + xfs_dq_logitem_t q_logitem; /* dquot log item */ + xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ + xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ + xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ + struct mutex q_qlock; /* quota lock */ + struct completion q_flush; /* flush completion queue */ + atomic_t q_pincount; /* dquot pin count */ + wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ +} xfs_dquot_t; + +/* + * Lock hierarchy for q_qlock: + * XFS_QLOCK_NORMAL is the implicit default, + * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 + */ +enum { + XFS_QLOCK_NORMAL = 0, + XFS_QLOCK_NESTED, +}; + +#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) + +/* + * Manage the q_flush completion queue embedded in the dquot. This completion + * queue synchronizes processes attempting to flush the in-core dquot back to + * disk. + */ +static inline void xfs_dqflock(xfs_dquot_t *dqp) +{ + wait_for_completion(&dqp->q_flush); +} + +static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) +{ + return try_wait_for_completion(&dqp->q_flush); +} + +static inline void xfs_dqfunlock(xfs_dquot_t *dqp) +{ + complete(&dqp->q_flush); +} + +#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) +#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) +#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) +#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) +#define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) +#define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo) +#define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \ + XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ + XFS_DQ_TO_QINF(dqp)->qi_gquotaip) + +#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \ + (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ + (XFS_IS_OQUOTA_ON((d)->q_mount)))) + +extern void xfs_qm_dqdestroy(xfs_dquot_t *); +extern int xfs_qm_dqflush(xfs_dquot_t *, uint); +extern int xfs_qm_dqpurge(xfs_dquot_t *); +extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); +extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); +extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp); +extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, + xfs_disk_dquot_t *); +extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, + xfs_disk_dquot_t *); +extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, + xfs_dqid_t, uint, uint, xfs_dquot_t **); +extern void xfs_qm_dqput(xfs_dquot_t *); +extern void xfs_dqlock(xfs_dquot_t *); +extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *); +extern void xfs_dqunlock(xfs_dquot_t *); +extern void xfs_dqunlock_nonotify(xfs_dquot_t *); + +#endif /* __XFS_DQUOT_H__ */ diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c new file mode 100644 index 0000000..9e0e2fa --- /dev/null +++ b/fs/xfs/xfs_dquot_item.c @@ -0,0 +1,529 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_trans_priv.h" +#include "xfs_qm.h" + +static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_dq_logitem, qli_item); +} + +/* + * returns the number of iovecs needed to log the given dquot item. + */ +STATIC uint +xfs_qm_dquot_logitem_size( + struct xfs_log_item *lip) +{ + /* + * we need only two iovecs, one for the format, one for the real thing + */ + return 2; +} + +/* + * fills in the vector of log iovecs for the given dquot log item. + */ +STATIC void +xfs_qm_dquot_logitem_format( + struct xfs_log_item *lip, + struct xfs_log_iovec *logvec) +{ + struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); + + logvec->i_addr = &qlip->qli_format; + logvec->i_len = sizeof(xfs_dq_logformat_t); + logvec->i_type = XLOG_REG_TYPE_QFORMAT; + logvec++; + logvec->i_addr = &qlip->qli_dquot->q_core; + logvec->i_len = sizeof(xfs_disk_dquot_t); + logvec->i_type = XLOG_REG_TYPE_DQUOT; + + ASSERT(2 == lip->li_desc->lid_size); + qlip->qli_format.qlf_size = 2; + +} + +/* + * Increment the pin count of the given dquot. + */ +STATIC void +xfs_qm_dquot_logitem_pin( + struct xfs_log_item *lip) +{ + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + atomic_inc(&dqp->q_pincount); +} + +/* + * Decrement the pin count of the given dquot, and wake up + * anyone in xfs_dqwait_unpin() if the count goes to 0. The + * dquot must have been previously pinned with a call to + * xfs_qm_dquot_logitem_pin(). + */ +STATIC void +xfs_qm_dquot_logitem_unpin( + struct xfs_log_item *lip, + int remove) +{ + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + + ASSERT(atomic_read(&dqp->q_pincount) > 0); + if (atomic_dec_and_test(&dqp->q_pincount)) + wake_up(&dqp->q_pinwait); +} + +/* + * Given the logitem, this writes the corresponding dquot entry to disk + * asynchronously. This is called with the dquot entry securely locked; + * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot + * at the end. + */ +STATIC void +xfs_qm_dquot_logitem_push( + struct xfs_log_item *lip) +{ + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + int error; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(!completion_done(&dqp->q_flush)); + + /* + * Since we were able to lock the dquot's flush lock and + * we found it on the AIL, the dquot must be dirty. This + * is because the dquot is removed from the AIL while still + * holding the flush lock in xfs_dqflush_done(). Thus, if + * we found it in the AIL and were able to obtain the flush + * lock without sleeping, then there must not have been + * anyone in the process of flushing the dquot. + */ + error = xfs_qm_dqflush(dqp, 0); + if (error) + xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", + __func__, error, dqp); + xfs_dqunlock(dqp); +} + +STATIC xfs_lsn_t +xfs_qm_dquot_logitem_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ + /* + * We always re-log the entire dquot when it becomes dirty, + * so, the latest copy _is_ the only one that matters. + */ + return lsn; +} + +/* + * This is called to wait for the given dquot to be unpinned. + * Most of these pin/unpin routines are plagiarized from inode code. + */ +void +xfs_qm_dqunpin_wait( + struct xfs_dquot *dqp) +{ + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + if (atomic_read(&dqp->q_pincount) == 0) + return; + + /* + * Give the log a push so we don't wait here too long. + */ + xfs_log_force(dqp->q_mount, 0); + wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); +} + +/* + * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that + * the dquot is locked by us, but the flush lock isn't. So, here we are + * going to see if the relevant dquot buffer is incore, waiting on DELWRI. + * If so, we want to push it out to help us take this item off the AIL as soon + * as possible. + * + * We must not be holding the AIL lock at this point. Calling incore() to + * search the buffer cache can be a time consuming thing, and AIL lock is a + * spinlock. + */ +STATIC void +xfs_qm_dquot_logitem_pushbuf( + struct xfs_log_item *lip) +{ + struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); + struct xfs_dquot *dqp = qlip->qli_dquot; + struct xfs_buf *bp; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + + /* + * If flushlock isn't locked anymore, chances are that the + * inode flush completed and the inode was taken off the AIL. + * So, just get out. + */ + if (completion_done(&dqp->q_flush) || + !(lip->li_flags & XFS_LI_IN_AIL)) { + xfs_dqunlock(dqp); + return; + } + + bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, + dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); + xfs_dqunlock(dqp); + if (!bp) + return; + if (XFS_BUF_ISDELAYWRITE(bp)) + xfs_buf_delwri_promote(bp); + xfs_buf_relse(bp); +} + +/* + * This is called to attempt to lock the dquot associated with this + * dquot log item. Don't sleep on the dquot lock or the flush lock. + * If the flush lock is already held, indicating that the dquot has + * been or is in the process of being flushed, then see if we can + * find the dquot's buffer in the buffer cache without sleeping. If + * we can and it is marked delayed write, then we want to send it out. + * We delay doing so until the push routine, though, to avoid sleeping + * in any device strategy routines. + */ +STATIC uint +xfs_qm_dquot_logitem_trylock( + struct xfs_log_item *lip) +{ + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + + if (atomic_read(&dqp->q_pincount) > 0) + return XFS_ITEM_PINNED; + + if (!xfs_qm_dqlock_nowait(dqp)) + return XFS_ITEM_LOCKED; + + if (!xfs_dqflock_nowait(dqp)) { + /* + * dquot has already been flushed to the backing buffer, + * leave it locked, pushbuf routine will unlock it. + */ + return XFS_ITEM_PUSHBUF; + } + + ASSERT(lip->li_flags & XFS_LI_IN_AIL); + return XFS_ITEM_SUCCESS; +} + +/* + * Unlock the dquot associated with the log item. + * Clear the fields of the dquot and dquot log item that + * are specific to the current transaction. If the + * hold flags is set, do not unlock the dquot. + */ +STATIC void +xfs_qm_dquot_logitem_unlock( + struct xfs_log_item *lip) +{ + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + + /* + * Clear the transaction pointer in the dquot + */ + dqp->q_transp = NULL; + + /* + * dquots are never 'held' from getting unlocked at the end of + * a transaction. Their locking and unlocking is hidden inside the + * transaction layer, within trans_commit. Hence, no LI_HOLD flag + * for the logitem. + */ + xfs_dqunlock(dqp); +} + +/* + * this needs to stamp an lsn into the dquot, I think. + * rpc's that look at user dquot's would then have to + * push on the dependency recorded in the dquot + */ +STATIC void +xfs_qm_dquot_logitem_committing( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ +} + +/* + * This is the ops vector for dquots + */ +static struct xfs_item_ops xfs_dquot_item_ops = { + .iop_size = xfs_qm_dquot_logitem_size, + .iop_format = xfs_qm_dquot_logitem_format, + .iop_pin = xfs_qm_dquot_logitem_pin, + .iop_unpin = xfs_qm_dquot_logitem_unpin, + .iop_trylock = xfs_qm_dquot_logitem_trylock, + .iop_unlock = xfs_qm_dquot_logitem_unlock, + .iop_committed = xfs_qm_dquot_logitem_committed, + .iop_push = xfs_qm_dquot_logitem_push, + .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf, + .iop_committing = xfs_qm_dquot_logitem_committing +}; + +/* + * Initialize the dquot log item for a newly allocated dquot. + * The dquot isn't locked at this point, but it isn't on any of the lists + * either, so we don't care. + */ +void +xfs_qm_dquot_logitem_init( + struct xfs_dquot *dqp) +{ + struct xfs_dq_logitem *lp = &dqp->q_logitem; + + xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, + &xfs_dquot_item_ops); + lp->qli_dquot = dqp; + lp->qli_format.qlf_type = XFS_LI_DQUOT; + lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); + lp->qli_format.qlf_blkno = dqp->q_blkno; + lp->qli_format.qlf_len = 1; + /* + * This is just the offset of this dquot within its buffer + * (which is currently 1 FSB and probably won't change). + * Hence 32 bits for this offset should be just fine. + * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t)) + * here, and recompute it at recovery time. + */ + lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset; +} + +/*------------------ QUOTAOFF LOG ITEMS -------------------*/ + +static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_qoff_logitem, qql_item); +} + + +/* + * This returns the number of iovecs needed to log the given quotaoff item. + * We only need 1 iovec for an quotaoff item. It just logs the + * quotaoff_log_format structure. + */ +STATIC uint +xfs_qm_qoff_logitem_size( + struct xfs_log_item *lip) +{ + return 1; +} + +/* + * This is called to fill in the vector of log iovecs for the + * given quotaoff log item. We use only 1 iovec, and we point that + * at the quotaoff_log_format structure embedded in the quotaoff item. + * It is at this point that we assert that all of the extent + * slots in the quotaoff item have been filled. + */ +STATIC void +xfs_qm_qoff_logitem_format( + struct xfs_log_item *lip, + struct xfs_log_iovec *log_vector) +{ + struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); + + ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF); + + log_vector->i_addr = &qflip->qql_format; + log_vector->i_len = sizeof(xfs_qoff_logitem_t); + log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; + qflip->qql_format.qf_size = 1; +} + +/* + * Pinning has no meaning for an quotaoff item, so just return. + */ +STATIC void +xfs_qm_qoff_logitem_pin( + struct xfs_log_item *lip) +{ +} + +/* + * Since pinning has no meaning for an quotaoff item, unpinning does + * not either. + */ +STATIC void +xfs_qm_qoff_logitem_unpin( + struct xfs_log_item *lip, + int remove) +{ +} + +/* + * Quotaoff items have no locking, so just return success. + */ +STATIC uint +xfs_qm_qoff_logitem_trylock( + struct xfs_log_item *lip) +{ + return XFS_ITEM_LOCKED; +} + +/* + * Quotaoff items have no locking or pushing, so return failure + * so that the caller doesn't bother with us. + */ +STATIC void +xfs_qm_qoff_logitem_unlock( + struct xfs_log_item *lip) +{ +} + +/* + * The quotaoff-start-item is logged only once and cannot be moved in the log, + * so simply return the lsn at which it's been logged. + */ +STATIC xfs_lsn_t +xfs_qm_qoff_logitem_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ + return lsn; +} + +/* + * There isn't much you can do to push on an quotaoff item. It is simply + * stuck waiting for the log to be flushed to disk. + */ +STATIC void +xfs_qm_qoff_logitem_push( + struct xfs_log_item *lip) +{ +} + + +STATIC xfs_lsn_t +xfs_qm_qoffend_logitem_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ + struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip); + struct xfs_qoff_logitem *qfs = qfe->qql_start_lip; + struct xfs_ail *ailp = qfs->qql_item.li_ailp; + + /* + * Delete the qoff-start logitem from the AIL. + * xfs_trans_ail_delete() drops the AIL lock. + */ + spin_lock(&ailp->xa_lock); + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs); + + kmem_free(qfs); + kmem_free(qfe); + return (xfs_lsn_t)-1; +} + +/* + * XXX rcc - don't know quite what to do with this. I think we can + * just ignore it. The only time that isn't the case is if we allow + * the client to somehow see that quotas have been turned off in which + * we can't allow that to get back until the quotaoff hits the disk. + * So how would that happen? Also, do we need different routines for + * quotaoff start and quotaoff end? I suspect the answer is yes but + * to be sure, I need to look at the recovery code and see how quota off + * recovery is handled (do we roll forward or back or do something else). + * If we roll forwards or backwards, then we need two separate routines, + * one that does nothing and one that stamps in the lsn that matters + * (truly makes the quotaoff irrevocable). If we do something else, + * then maybe we don't need two. + */ +STATIC void +xfs_qm_qoff_logitem_committing( + struct xfs_log_item *lip, + xfs_lsn_t commit_lsn) +{ +} + +static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { + .iop_size = xfs_qm_qoff_logitem_size, + .iop_format = xfs_qm_qoff_logitem_format, + .iop_pin = xfs_qm_qoff_logitem_pin, + .iop_unpin = xfs_qm_qoff_logitem_unpin, + .iop_trylock = xfs_qm_qoff_logitem_trylock, + .iop_unlock = xfs_qm_qoff_logitem_unlock, + .iop_committed = xfs_qm_qoffend_logitem_committed, + .iop_push = xfs_qm_qoff_logitem_push, + .iop_committing = xfs_qm_qoff_logitem_committing +}; + +/* + * This is the ops vector shared by all quotaoff-start log items. + */ +static struct xfs_item_ops xfs_qm_qoff_logitem_ops = { + .iop_size = xfs_qm_qoff_logitem_size, + .iop_format = xfs_qm_qoff_logitem_format, + .iop_pin = xfs_qm_qoff_logitem_pin, + .iop_unpin = xfs_qm_qoff_logitem_unpin, + .iop_trylock = xfs_qm_qoff_logitem_trylock, + .iop_unlock = xfs_qm_qoff_logitem_unlock, + .iop_committed = xfs_qm_qoff_logitem_committed, + .iop_push = xfs_qm_qoff_logitem_push, + .iop_committing = xfs_qm_qoff_logitem_committing +}; + +/* + * Allocate and initialize an quotaoff item of the correct quota type(s). + */ +struct xfs_qoff_logitem * +xfs_qm_qoff_logitem_init( + struct xfs_mount *mp, + struct xfs_qoff_logitem *start, + uint flags) +{ + struct xfs_qoff_logitem *qf; + + qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP); + + xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? + &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); + qf->qql_item.li_mountp = mp; + qf->qql_format.qf_type = XFS_LI_QUOTAOFF; + qf->qql_format.qf_flags = flags; + qf->qql_start_lip = start; + return qf; +} diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h new file mode 100644 index 0000000..5acae2a --- /dev/null +++ b/fs/xfs/xfs_dquot_item.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DQUOT_ITEM_H__ +#define __XFS_DQUOT_ITEM_H__ + +struct xfs_dquot; +struct xfs_trans; +struct xfs_mount; +struct xfs_qoff_logitem; + +typedef struct xfs_dq_logitem { + xfs_log_item_t qli_item; /* common portion */ + struct xfs_dquot *qli_dquot; /* dquot ptr */ + xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ + xfs_dq_logformat_t qli_format; /* logged structure */ +} xfs_dq_logitem_t; + +typedef struct xfs_qoff_logitem { + xfs_log_item_t qql_item; /* common portion */ + struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ + xfs_qoff_logformat_t qql_format; /* logged structure */ +} xfs_qoff_logitem_t; + + +extern void xfs_qm_dquot_logitem_init(struct xfs_dquot *); +extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *, + struct xfs_qoff_logitem *, uint); +extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *, + struct xfs_qoff_logitem *, uint); +extern void xfs_trans_log_quotaoff_item(struct xfs_trans *, + struct xfs_qoff_logitem *); + +#endif /* __XFS_DQUOT_ITEM_H__ */ diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c new file mode 100644 index 0000000..75e5d32 --- /dev/null +++ b/fs/xfs/xfs_export.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2004-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_mount.h" +#include "xfs_export.h" +#include "xfs_vnodeops.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_inode_item.h" +#include "xfs_trace.h" + +/* + * Note that we only accept fileids which are long enough rather than allow + * the parent generation number to default to zero. XFS considers zero a + * valid generation number not an invalid/wildcard value. + */ +static int xfs_fileid_length(int fileid_type) +{ + switch (fileid_type) { + case FILEID_INO32_GEN: + return 2; + case FILEID_INO32_GEN_PARENT: + return 4; + case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: + return 3; + case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: + return 6; + } + return 255; /* invalid */ +} + +STATIC int +xfs_fs_encode_fh( + struct dentry *dentry, + __u32 *fh, + int *max_len, + int connectable) +{ + struct fid *fid = (struct fid *)fh; + struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; + struct inode *inode = dentry->d_inode; + int fileid_type; + int len; + + /* Directories don't need their parent encoded, they have ".." */ + if (S_ISDIR(inode->i_mode) || !connectable) + fileid_type = FILEID_INO32_GEN; + else + fileid_type = FILEID_INO32_GEN_PARENT; + + /* + * If the the filesystem may contain 64bit inode numbers, we need + * to use larger file handles that can represent them. + * + * While we only allocate inodes that do not fit into 32 bits any + * large enough filesystem may contain them, thus the slightly + * confusing looking conditional below. + */ + if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) || + (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES)) + fileid_type |= XFS_FILEID_TYPE_64FLAG; + + /* + * Only encode if there is enough space given. In practice + * this means we can't export a filesystem with 64bit inodes + * over NFSv2 with the subtree_check export option; the other + * seven combinations work. The real answer is "don't use v2". + */ + len = xfs_fileid_length(fileid_type); + if (*max_len < len) { + *max_len = len; + return 255; + } + *max_len = len; + + switch (fileid_type) { + case FILEID_INO32_GEN_PARENT: + spin_lock(&dentry->d_lock); + fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino; + fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; + spin_unlock(&dentry->d_lock); + /*FALLTHRU*/ + case FILEID_INO32_GEN: + fid->i32.ino = inode->i_ino; + fid->i32.gen = inode->i_generation; + break; + case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: + spin_lock(&dentry->d_lock); + fid64->parent_ino = dentry->d_parent->d_inode->i_ino; + fid64->parent_gen = dentry->d_parent->d_inode->i_generation; + spin_unlock(&dentry->d_lock); + /*FALLTHRU*/ + case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: + fid64->ino = inode->i_ino; + fid64->gen = inode->i_generation; + break; + } + + return fileid_type; +} + +STATIC struct inode * +xfs_nfs_get_inode( + struct super_block *sb, + u64 ino, + u32 generation) + { + xfs_mount_t *mp = XFS_M(sb); + xfs_inode_t *ip; + int error; + + /* + * NFS can sometimes send requests for ino 0. Fail them gracefully. + */ + if (ino == 0) + return ERR_PTR(-ESTALE); + + /* + * The XFS_IGET_UNTRUSTED means that an invalid inode number is just + * fine and not an indication of a corrupted filesystem as clients can + * send invalid file handles and we have to handle it gracefully.. + */ + error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip); + if (error) { + /* + * EINVAL means the inode cluster doesn't exist anymore. + * This implies the filehandle is stale, so we should + * translate it here. + * We don't use ESTALE directly down the chain to not + * confuse applications using bulkstat that expect EINVAL. + */ + if (error == EINVAL || error == ENOENT) + error = ESTALE; + return ERR_PTR(-error); + } + + if (ip->i_d.di_gen != generation) { + IRELE(ip); + return ERR_PTR(-ESTALE); + } + + return VFS_I(ip); +} + +STATIC struct dentry * +xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fileid_type) +{ + struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; + struct inode *inode = NULL; + + if (fh_len < xfs_fileid_length(fileid_type)) + return NULL; + + switch (fileid_type) { + case FILEID_INO32_GEN_PARENT: + case FILEID_INO32_GEN: + inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen); + break; + case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: + case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: + inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen); + break; + } + + return d_obtain_alias(inode); +} + +STATIC struct dentry * +xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fileid_type) +{ + struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; + struct inode *inode = NULL; + + switch (fileid_type) { + case FILEID_INO32_GEN_PARENT: + inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino, + fid->i32.parent_gen); + break; + case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: + inode = xfs_nfs_get_inode(sb, fid64->parent_ino, + fid64->parent_gen); + break; + } + + return d_obtain_alias(inode); +} + +STATIC struct dentry * +xfs_fs_get_parent( + struct dentry *child) +{ + int error; + struct xfs_inode *cip; + + error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); + if (unlikely(error)) + return ERR_PTR(-error); + + return d_obtain_alias(VFS_I(cip)); +} + +STATIC int +xfs_fs_nfs_commit_metadata( + struct inode *inode) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + int error = 0; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (xfs_ipincount(ip)) { + error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn, + XFS_LOG_SYNC, NULL); + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + return error; +} + +const struct export_operations xfs_export_operations = { + .encode_fh = xfs_fs_encode_fh, + .fh_to_dentry = xfs_fs_fh_to_dentry, + .fh_to_parent = xfs_fs_fh_to_parent, + .get_parent = xfs_fs_get_parent, + .commit_metadata = xfs_fs_nfs_commit_metadata, +}; diff --git a/fs/xfs/xfs_export.h b/fs/xfs/xfs_export.h new file mode 100644 index 0000000..3272b6a --- /dev/null +++ b/fs/xfs/xfs_export.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_EXPORT_H__ +#define __XFS_EXPORT_H__ + +/* + * Common defines for code related to exporting XFS filesystems over NFS. + * + * The NFS fileid goes out on the wire as an array of + * 32bit unsigned ints in host order. There are 5 possible + * formats. + * + * (1) fileid_type=0x00 + * (no fileid data; handled by the generic code) + * + * (2) fileid_type=0x01 + * inode-num + * generation + * + * (3) fileid_type=0x02 + * inode-num + * generation + * parent-inode-num + * parent-generation + * + * (4) fileid_type=0x81 + * inode-num-lo32 + * inode-num-hi32 + * generation + * + * (5) fileid_type=0x82 + * inode-num-lo32 + * inode-num-hi32 + * generation + * parent-inode-num-lo32 + * parent-inode-num-hi32 + * parent-generation + * + * Note, the NFS filehandle also includes an fsid portion which + * may have an inode number in it. That number is hardcoded to + * 32bits and there is no way for XFS to intercept it. In + * practice this means when exporting an XFS filesystem with 64bit + * inodes you should either export the mountpoint (rather than + * a subdirectory) or use the "fsid" export option. + */ + +struct xfs_fid64 { + u64 ino; + u32 gen; + u64 parent_ino; + u32 parent_gen; +} __attribute__((packed)); + +/* This flag goes on the wire. Don't play with it. */ +#define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */ + +#endif /* __XFS_EXPORT_H__ */ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c new file mode 100644 index 0000000..7f7b424 --- /dev/null +++ b/fs/xfs/xfs_file.c @@ -0,0 +1,1096 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_trans.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_inode_item.h" +#include "xfs_bmap.h" +#include "xfs_error.h" +#include "xfs_vnodeops.h" +#include "xfs_da_btree.h" +#include "xfs_ioctl.h" +#include "xfs_trace.h" + +#include +#include + +static const struct vm_operations_struct xfs_file_vm_ops; + +/* + * Locking primitives for read and write IO paths to ensure we consistently use + * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. + */ +static inline void +xfs_rw_ilock( + struct xfs_inode *ip, + int type) +{ + if (type & XFS_IOLOCK_EXCL) + mutex_lock(&VFS_I(ip)->i_mutex); + xfs_ilock(ip, type); +} + +static inline void +xfs_rw_iunlock( + struct xfs_inode *ip, + int type) +{ + xfs_iunlock(ip, type); + if (type & XFS_IOLOCK_EXCL) + mutex_unlock(&VFS_I(ip)->i_mutex); +} + +static inline void +xfs_rw_ilock_demote( + struct xfs_inode *ip, + int type) +{ + xfs_ilock_demote(ip, type); + if (type & XFS_IOLOCK_EXCL) + mutex_unlock(&VFS_I(ip)->i_mutex); +} + +/* + * xfs_iozero + * + * xfs_iozero clears the specified range of buffer supplied, + * and marks all the affected blocks as valid and modified. If + * an affected block is not allocated, it will be allocated. If + * an affected block is not completely overwritten, and is not + * valid before the operation, it will be read from disk before + * being partially zeroed. + */ +STATIC int +xfs_iozero( + struct xfs_inode *ip, /* inode */ + loff_t pos, /* offset in file */ + size_t count) /* size of data to zero */ +{ + struct page *page; + struct address_space *mapping; + int status; + + mapping = VFS_I(ip)->i_mapping; + do { + unsigned offset, bytes; + void *fsdata; + + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ + bytes = PAGE_CACHE_SIZE - offset; + if (bytes > count) + bytes = count; + + status = pagecache_write_begin(NULL, mapping, pos, bytes, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); + if (status) + break; + + zero_user(page, offset, bytes); + + status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, + page, fsdata); + WARN_ON(status <= 0); /* can't return less than zero! */ + pos += bytes; + count -= bytes; + status = 0; + } while (count); + + return (-status); +} + +STATIC int +xfs_file_fsync( + struct file *file, + loff_t start, + loff_t end, + int datasync) +{ + struct inode *inode = file->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error = 0; + int log_flushed = 0; + + trace_xfs_file_fsync(ip); + + error = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (error) + return error; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + xfs_iflags_clear(ip, XFS_ITRUNCATED); + + xfs_ilock(ip, XFS_IOLOCK_SHARED); + xfs_ioend_wait(ip); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + + if (mp->m_flags & XFS_MOUNT_BARRIER) { + /* + * If we have an RT and/or log subvolume we need to make sure + * to flush the write cache the device used for file data + * first. This is to ensure newly written file data make + * it to disk before logging the new inode size in case of + * an extending write. + */ + if (XFS_IS_REALTIME_INODE(ip)) + xfs_blkdev_issue_flush(mp->m_rtdev_targp); + else if (mp->m_logdev_targp != mp->m_ddev_targp) + xfs_blkdev_issue_flush(mp->m_ddev_targp); + } + + /* + * We always need to make sure that the required inode state is safe on + * disk. The inode might be clean but we still might need to force the + * log because of committed transactions that haven't hit the disk yet. + * Likewise, there could be unflushed non-transactional changes to the + * inode core that have to go to disk and this requires us to issue + * a synchronous transaction to capture these changes correctly. + * + * This code relies on the assumption that if the i_update_core field + * of the inode is clear and the inode is unpinned then it is clean + * and no action is required. + */ + xfs_ilock(ip, XFS_ILOCK_SHARED); + + /* + * First check if the VFS inode is marked dirty. All the dirtying + * of non-transactional updates no goes through mark_inode_dirty*, + * which allows us to distinguish beteeen pure timestamp updates + * and i_size updates which need to be caught for fdatasync. + * After that also theck for the dirty state in the XFS inode, which + * might gets cleared when the inode gets written out via the AIL + * or xfs_iflush_cluster. + */ + if (((inode->i_state & I_DIRTY_DATASYNC) || + ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && + ip->i_update_core) { + /* + * Kick off a transaction to log the inode core to get the + * updates. The sync transaction will also force the log. + */ + xfs_iunlock(ip, XFS_ILOCK_SHARED); + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, + XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return -error; + } + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Note - it's possible that we might have pushed ourselves out + * of the way during trans_reserve which would flush the inode. + * But there's no guarantee that the inode buffer has actually + * gone out yet (it's delwri). Plus the buffer could be pinned + * anyway if it's part of an inode in another recent + * transaction. So we play it safe and fire off the + * transaction anyway. + */ + xfs_trans_ijoin(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + error = _xfs_trans_commit(tp, 0, &log_flushed); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } else { + /* + * Timestamps/size haven't changed since last inode flush or + * inode transaction commit. That means either nothing got + * written or a transaction committed which caught the updates. + * If the latter happened and the transaction hasn't hit the + * disk yet, the inode will be still be pinned. If it is, + * force the log. + */ + if (xfs_ipincount(ip)) { + error = _xfs_log_force_lsn(mp, + ip->i_itemp->ili_last_lsn, + XFS_LOG_SYNC, &log_flushed); + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); + } + + /* + * If we only have a single device, and the log force about was + * a no-op we might have to flush the data device cache here. + * This can only happen for fdatasync/O_DSYNC if we were overwriting + * an already allocated file and thus do not have any metadata to + * commit. + */ + if ((mp->m_flags & XFS_MOUNT_BARRIER) && + mp->m_logdev_targp == mp->m_ddev_targp && + !XFS_IS_REALTIME_INODE(ip) && + !log_flushed) + xfs_blkdev_issue_flush(mp->m_ddev_targp); + + return -error; +} + +STATIC ssize_t +xfs_file_aio_read( + struct kiocb *iocb, + const struct iovec *iovp, + unsigned long nr_segs, + loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + size_t size = 0; + ssize_t ret = 0; + int ioflags = 0; + xfs_fsize_t n; + unsigned long seg; + + XFS_STATS_INC(xs_read_calls); + + BUG_ON(iocb->ki_pos != pos); + + if (unlikely(file->f_flags & O_DIRECT)) + ioflags |= IO_ISDIRECT; + if (file->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + /* START copy & waste from filemap.c */ + for (seg = 0; seg < nr_segs; seg++) { + const struct iovec *iv = &iovp[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + size += iv->iov_len; + if (unlikely((ssize_t)(size|iv->iov_len) < 0)) + return XFS_ERROR(-EINVAL); + } + /* END copy & waste from filemap.c */ + + if (unlikely(ioflags & IO_ISDIRECT)) { + xfs_buftarg_t *target = + XFS_IS_REALTIME_INODE(ip) ? + mp->m_rtdev_targp : mp->m_ddev_targp; + if ((iocb->ki_pos & target->bt_smask) || + (size & target->bt_smask)) { + if (iocb->ki_pos == ip->i_size) + return 0; + return -XFS_ERROR(EINVAL); + } + } + + n = XFS_MAXIOFFSET(mp) - iocb->ki_pos; + if (n <= 0 || size == 0) + return 0; + + if (n < size) + size = n; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + if (unlikely(ioflags & IO_ISDIRECT)) { + xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); + + if (inode->i_mapping->nrpages) { + ret = -xfs_flushinval_pages(ip, + (iocb->ki_pos & PAGE_CACHE_MASK), + -1, FI_REMAPF_LOCKED); + if (ret) { + xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); + return ret; + } + } + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); + } else + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + + trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); + + ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos); + if (ret > 0) + XFS_STATS_ADD(xs_read_bytes, ret); + + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); + return ret; +} + +STATIC ssize_t +xfs_file_splice_read( + struct file *infilp, + loff_t *ppos, + struct pipe_inode_info *pipe, + size_t count, + unsigned int flags) +{ + struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); + int ioflags = 0; + ssize_t ret; + + XFS_STATS_INC(xs_read_calls); + + if (infilp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; + + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + + trace_xfs_file_splice_read(ip, count, *ppos, ioflags); + + ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); + if (ret > 0) + XFS_STATS_ADD(xs_read_bytes, ret); + + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); + return ret; +} + +STATIC void +xfs_aio_write_isize_update( + struct inode *inode, + loff_t *ppos, + ssize_t bytes_written) +{ + struct xfs_inode *ip = XFS_I(inode); + xfs_fsize_t isize = i_size_read(inode); + + if (bytes_written > 0) + XFS_STATS_ADD(xs_write_bytes, bytes_written); + + if (unlikely(bytes_written < 0 && bytes_written != -EFAULT && + *ppos > isize)) + *ppos = isize; + + if (*ppos > ip->i_size) { + xfs_rw_ilock(ip, XFS_ILOCK_EXCL); + if (*ppos > ip->i_size) + ip->i_size = *ppos; + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); + } +} + +/* + * If this was a direct or synchronous I/O that failed (such as ENOSPC) then + * part of the I/O may have been written to disk before the error occurred. In + * this case the on-disk file size may have been adjusted beyond the in-memory + * file size and now needs to be truncated back. + */ +STATIC void +xfs_aio_write_newsize_update( + struct xfs_inode *ip) +{ + if (ip->i_new_size) { + xfs_rw_ilock(ip, XFS_ILOCK_EXCL); + ip->i_new_size = 0; + if (ip->i_d.di_size > ip->i_size) + ip->i_d.di_size = ip->i_size; + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); + } +} + +/* + * xfs_file_splice_write() does not use xfs_rw_ilock() because + * generic_file_splice_write() takes the i_mutex itself. This, in theory, + * couuld cause lock inversions between the aio_write path and the splice path + * if someone is doing concurrent splice(2) based writes and write(2) based + * writes to the same inode. The only real way to fix this is to re-implement + * the generic code here with correct locking orders. + */ +STATIC ssize_t +xfs_file_splice_write( + struct pipe_inode_info *pipe, + struct file *outfilp, + loff_t *ppos, + size_t count, + unsigned int flags) +{ + struct inode *inode = outfilp->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); + xfs_fsize_t new_size; + int ioflags = 0; + ssize_t ret; + + XFS_STATS_INC(xs_write_calls); + + if (outfilp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + + new_size = *ppos + count; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (new_size > ip->i_size) + ip->i_new_size = new_size; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + trace_xfs_file_splice_write(ip, count, *ppos, ioflags); + + ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); + + xfs_aio_write_isize_update(inode, ppos, ret); + xfs_aio_write_newsize_update(ip); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return ret; +} + +/* + * This routine is called to handle zeroing any space in the last + * block of the file that is beyond the EOF. We do this since the + * size is being increased without writing anything to that block + * and we don't want anyone to read the garbage on the disk. + */ +STATIC int /* error (positive) */ +xfs_zero_last_block( + xfs_inode_t *ip, + xfs_fsize_t offset, + xfs_fsize_t isize) +{ + xfs_fileoff_t last_fsb; + xfs_mount_t *mp = ip->i_mount; + int nimaps; + int zero_offset; + int zero_len; + int error = 0; + xfs_bmbt_irec_t imap; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + zero_offset = XFS_B_FSB_OFFSET(mp, isize); + if (zero_offset == 0) { + /* + * There are no extra bytes in the last block on disk to + * zero, so return. + */ + return 0; + } + + last_fsb = XFS_B_TO_FSBT(mp, isize); + nimaps = 1; + error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, + &nimaps, NULL); + if (error) { + return error; + } + ASSERT(nimaps > 0); + /* + * If the block underlying isize is just a hole, then there + * is nothing to zero. + */ + if (imap.br_startblock == HOLESTARTBLOCK) { + return 0; + } + /* + * Zero the part of the last block beyond the EOF, and write it + * out sync. We need to drop the ilock while we do this so we + * don't deadlock when the buffer cache calls back to us. + */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + zero_len = mp->m_sb.sb_blocksize - zero_offset; + if (isize + zero_len > offset) + zero_len = offset - isize; + error = xfs_iozero(ip, isize, zero_len); + + xfs_ilock(ip, XFS_ILOCK_EXCL); + ASSERT(error >= 0); + return error; +} + +/* + * Zero any on disk space between the current EOF and the new, + * larger EOF. This handles the normal case of zeroing the remainder + * of the last block in the file and the unusual case of zeroing blocks + * out beyond the size of the file. This second case only happens + * with fixed size extents and when the system crashes before the inode + * size was updated but after blocks were allocated. If fill is set, + * then any holes in the range are filled and zeroed. If not, the holes + * are left alone as holes. + */ + +int /* error (positive) */ +xfs_zero_eof( + xfs_inode_t *ip, + xfs_off_t offset, /* starting I/O offset */ + xfs_fsize_t isize) /* current inode size */ +{ + xfs_mount_t *mp = ip->i_mount; + xfs_fileoff_t start_zero_fsb; + xfs_fileoff_t end_zero_fsb; + xfs_fileoff_t zero_count_fsb; + xfs_fileoff_t last_fsb; + xfs_fileoff_t zero_off; + xfs_fsize_t zero_len; + int nimaps; + int error = 0; + xfs_bmbt_irec_t imap; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); + ASSERT(offset > isize); + + /* + * First handle zeroing the block on which isize resides. + * We only zero a part of that block so it is handled specially. + */ + error = xfs_zero_last_block(ip, offset, isize); + if (error) { + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); + return error; + } + + /* + * Calculate the range between the new size and the old + * where blocks needing to be zeroed may exist. To get the + * block where the last byte in the file currently resides, + * we need to subtract one from the size and truncate back + * to a block boundary. We subtract 1 in case the size is + * exactly on a block boundary. + */ + last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; + start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); + end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); + ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); + if (last_fsb == end_zero_fsb) { + /* + * The size was only incremented on its last block. + * We took care of that above, so just return. + */ + return 0; + } + + ASSERT(start_zero_fsb <= end_zero_fsb); + while (start_zero_fsb <= end_zero_fsb) { + nimaps = 1; + zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; + error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, + 0, NULL, 0, &imap, &nimaps, NULL); + if (error) { + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); + return error; + } + ASSERT(nimaps > 0); + + if (imap.br_state == XFS_EXT_UNWRITTEN || + imap.br_startblock == HOLESTARTBLOCK) { + /* + * This loop handles initializing pages that were + * partially initialized by the code below this + * loop. It basically zeroes the part of the page + * that sits on a hole and sets the page as P_HOLE + * and calls remapf if it is a mapped file. + */ + start_zero_fsb = imap.br_startoff + imap.br_blockcount; + ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); + continue; + } + + /* + * There are blocks we need to zero. + * Drop the inode lock while we're doing the I/O. + * We'll still have the iolock to protect us. + */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); + zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); + + if ((zero_off + zero_len) > offset) + zero_len = offset - zero_off; + + error = xfs_iozero(ip, zero_off, zero_len); + if (error) { + goto out_lock; + } + + start_zero_fsb = imap.br_startoff + imap.br_blockcount; + ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); + + xfs_ilock(ip, XFS_ILOCK_EXCL); + } + + return 0; + +out_lock: + xfs_ilock(ip, XFS_ILOCK_EXCL); + ASSERT(error >= 0); + return error; +} + +/* + * Common pre-write limit and setup checks. + * + * Returns with iolock held according to @iolock. + */ +STATIC ssize_t +xfs_file_aio_write_checks( + struct file *file, + loff_t *pos, + size_t *count, + int *iolock) +{ + struct inode *inode = file->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); + xfs_fsize_t new_size; + int error = 0; + + error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); + if (error) { + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); + *iolock = 0; + return error; + } + + new_size = *pos + *count; + if (new_size > ip->i_size) + ip->i_new_size = new_size; + + if (likely(!(file->f_mode & FMODE_NOCMTIME))) + file_update_time(file); + + /* + * If the offset is beyond the size of the file, we need to zero any + * blocks that fall between the existing EOF and the start of this + * write. + */ + if (*pos > ip->i_size) + error = -xfs_zero_eof(ip, *pos, ip->i_size); + + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); + if (error) + return error; + + /* + * If we're writing the file then make sure to clear the setuid and + * setgid bits if the process is not being run by root. This keeps + * people from modifying setuid and setgid binaries. + */ + return file_remove_suid(file); + +} + +/* + * xfs_file_dio_aio_write - handle direct IO writes + * + * Lock the inode appropriately to prepare for and issue a direct IO write. + * By separating it from the buffered write path we remove all the tricky to + * follow locking changes and looping. + * + * If there are cached pages or we're extending the file, we need IOLOCK_EXCL + * until we're sure the bytes at the new EOF have been zeroed and/or the cached + * pages are flushed out. + * + * In most cases the direct IO writes will be done holding IOLOCK_SHARED + * allowing them to be done in parallel with reads and other direct IO writes. + * However, if the IO is not aligned to filesystem blocks, the direct IO layer + * needs to do sub-block zeroing and that requires serialisation against other + * direct IOs to the same block. In this case we need to serialise the + * submission of the unaligned IOs so that we don't get racing block zeroing in + * the dio layer. To avoid the problem with aio, we also need to wait for + * outstanding IOs to complete so that unwritten extent conversion is completed + * before we try to map the overlapping block. This is currently implemented by + * hitting it with a big hammer (i.e. xfs_ioend_wait()). + * + * Returns with locks held indicated by @iolock and errors indicated by + * negative return values. + */ +STATIC ssize_t +xfs_file_dio_aio_write( + struct kiocb *iocb, + const struct iovec *iovp, + unsigned long nr_segs, + loff_t pos, + size_t ocount, + int *iolock) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + ssize_t ret = 0; + size_t count = ocount; + int unaligned_io = 0; + struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? + mp->m_rtdev_targp : mp->m_ddev_targp; + + *iolock = 0; + if ((pos & target->bt_smask) || (count & target->bt_smask)) + return -XFS_ERROR(EINVAL); + + if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) + unaligned_io = 1; + + if (unaligned_io || mapping->nrpages || pos > ip->i_size) + *iolock = XFS_IOLOCK_EXCL; + else + *iolock = XFS_IOLOCK_SHARED; + xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); + + ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); + if (ret) + return ret; + + if (mapping->nrpages) { + WARN_ON(*iolock != XFS_IOLOCK_EXCL); + ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, + FI_REMAPF_LOCKED); + if (ret) + return ret; + } + + /* + * If we are doing unaligned IO, wait for all other IO to drain, + * otherwise demote the lock if we had to flush cached pages + */ + if (unaligned_io) + xfs_ioend_wait(ip); + else if (*iolock == XFS_IOLOCK_EXCL) { + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); + *iolock = XFS_IOLOCK_SHARED; + } + + trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); + ret = generic_file_direct_write(iocb, iovp, + &nr_segs, pos, &iocb->ki_pos, count, ocount); + + /* No fallback to buffered IO on errors for XFS. */ + ASSERT(ret < 0 || ret == count); + return ret; +} + +STATIC ssize_t +xfs_file_buffered_aio_write( + struct kiocb *iocb, + const struct iovec *iovp, + unsigned long nr_segs, + loff_t pos, + size_t ocount, + int *iolock) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + ssize_t ret; + int enospc = 0; + size_t count = ocount; + + *iolock = XFS_IOLOCK_EXCL; + xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); + + ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); + if (ret) + return ret; + + /* We can write back this queue in page reclaim */ + current->backing_dev_info = mapping->backing_dev_info; + +write_retry: + trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); + ret = generic_file_buffered_write(iocb, iovp, nr_segs, + pos, &iocb->ki_pos, count, ret); + /* + * if we just got an ENOSPC, flush the inode now we aren't holding any + * page locks and retry *once* + */ + if (ret == -ENOSPC && !enospc) { + ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); + if (ret) + return ret; + enospc = 1; + goto write_retry; + } + current->backing_dev_info = NULL; + return ret; +} + +STATIC ssize_t +xfs_file_aio_write( + struct kiocb *iocb, + const struct iovec *iovp, + unsigned long nr_segs, + loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + ssize_t ret; + int iolock; + size_t ocount = 0; + + XFS_STATS_INC(xs_write_calls); + + BUG_ON(iocb->ki_pos != pos); + + ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); + if (ret) + return ret; + + if (ocount == 0) + return 0; + + xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; + + if (unlikely(file->f_flags & O_DIRECT)) + ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, + ocount, &iolock); + else + ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, + ocount, &iolock); + + xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); + + if (ret <= 0) + goto out_unlock; + + /* Handle various SYNC-type writes */ + if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { + loff_t end = pos + ret - 1; + int error; + + xfs_rw_iunlock(ip, iolock); + error = xfs_file_fsync(file, pos, end, + (file->f_flags & __O_SYNC) ? 0 : 1); + xfs_rw_ilock(ip, iolock); + if (error) + ret = error; + } + +out_unlock: + xfs_aio_write_newsize_update(ip); + xfs_rw_iunlock(ip, iolock); + return ret; +} + +STATIC long +xfs_file_fallocate( + struct file *file, + int mode, + loff_t offset, + loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + long error; + loff_t new_size = 0; + xfs_flock64_t bf; + xfs_inode_t *ip = XFS_I(inode); + int cmd = XFS_IOC_RESVSP; + int attr_flags = XFS_ATTR_NOLOCK; + + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + + bf.l_whence = 0; + bf.l_start = offset; + bf.l_len = len; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + + if (mode & FALLOC_FL_PUNCH_HOLE) + cmd = XFS_IOC_UNRESVSP; + + /* check the new inode size is valid before allocating */ + if (!(mode & FALLOC_FL_KEEP_SIZE) && + offset + len > i_size_read(inode)) { + new_size = offset + len; + error = inode_newsize_ok(inode, new_size); + if (error) + goto out_unlock; + } + + if (file->f_flags & O_DSYNC) + attr_flags |= XFS_ATTR_SYNC; + + error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); + if (error) + goto out_unlock; + + /* Change file size if needed */ + if (new_size) { + struct iattr iattr; + + iattr.ia_valid = ATTR_SIZE; + iattr.ia_size = new_size; + error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK); + } + +out_unlock: + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; +} + + +STATIC int +xfs_file_open( + struct inode *inode, + struct file *file) +{ + if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) + return -EFBIG; + if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) + return -EIO; + return 0; +} + +STATIC int +xfs_dir_open( + struct inode *inode, + struct file *file) +{ + struct xfs_inode *ip = XFS_I(inode); + int mode; + int error; + + error = xfs_file_open(inode, file); + if (error) + return error; + + /* + * If there are any blocks, read-ahead block 0 as we're almost + * certain to have the next operation be a read there. + */ + mode = xfs_ilock_map_shared(ip); + if (ip->i_d.di_nextents > 0) + xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); + xfs_iunlock(ip, mode); + return 0; +} + +STATIC int +xfs_file_release( + struct inode *inode, + struct file *filp) +{ + return -xfs_release(XFS_I(inode)); +} + +STATIC int +xfs_file_readdir( + struct file *filp, + void *dirent, + filldir_t filldir) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + xfs_inode_t *ip = XFS_I(inode); + int error; + size_t bufsize; + + /* + * The Linux API doesn't pass down the total size of the buffer + * we read into down to the filesystem. With the filldir concept + * it's not needed for correct information, but the XFS dir2 leaf + * code wants an estimate of the buffer size to calculate it's + * readahead window and size the buffers used for mapping to + * physical blocks. + * + * Try to give it an estimate that's good enough, maybe at some + * point we can change the ->readdir prototype to include the + * buffer size. For now we use the current glibc buffer size. + */ + bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); + + error = xfs_readdir(ip, dirent, bufsize, + (xfs_off_t *)&filp->f_pos, filldir); + if (error) + return -error; + return 0; +} + +STATIC int +xfs_file_mmap( + struct file *filp, + struct vm_area_struct *vma) +{ + vma->vm_ops = &xfs_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; + + file_accessed(filp); + return 0; +} + +/* + * mmap()d file has taken write protection fault and is being made + * writable. We can set the page state up correctly for a writable + * page, which means we can do correct delalloc accounting (ENOSPC + * checking!) and unwritten extent mapping. + */ +STATIC int +xfs_vm_page_mkwrite( + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + return block_page_mkwrite(vma, vmf, xfs_get_blocks); +} + +const struct file_operations xfs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = xfs_file_aio_read, + .aio_write = xfs_file_aio_write, + .splice_read = xfs_file_splice_read, + .splice_write = xfs_file_splice_write, + .unlocked_ioctl = xfs_file_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = xfs_file_compat_ioctl, +#endif + .mmap = xfs_file_mmap, + .open = xfs_file_open, + .release = xfs_file_release, + .fsync = xfs_file_fsync, + .fallocate = xfs_file_fallocate, +}; + +const struct file_operations xfs_dir_file_operations = { + .open = xfs_dir_open, + .read = generic_read_dir, + .readdir = xfs_file_readdir, + .llseek = generic_file_llseek, + .unlocked_ioctl = xfs_file_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = xfs_file_compat_ioctl, +#endif + .fsync = xfs_file_fsync, +}; + +static const struct vm_operations_struct xfs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = xfs_vm_page_mkwrite, +}; diff --git a/fs/xfs/xfs_fs_subr.c b/fs/xfs/xfs_fs_subr.c new file mode 100644 index 0000000..ed88ed1 --- /dev/null +++ b/fs/xfs/xfs_fs_subr.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_vnodeops.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_trace.h" + +/* + * note: all filemap functions return negative error codes. These + * need to be inverted before returning to the xfs core functions. + */ +void +xfs_tosspages( + xfs_inode_t *ip, + xfs_off_t first, + xfs_off_t last, + int fiopt) +{ + /* can't toss partial tail pages, so mask them out */ + last &= ~(PAGE_SIZE - 1); + truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1); +} + +int +xfs_flushinval_pages( + xfs_inode_t *ip, + xfs_off_t first, + xfs_off_t last, + int fiopt) +{ + struct address_space *mapping = VFS_I(ip)->i_mapping; + int ret = 0; + + trace_xfs_pagecache_inval(ip, first, last); + + xfs_iflags_clear(ip, XFS_ITRUNCATED); + ret = filemap_write_and_wait_range(mapping, first, + last == -1 ? LLONG_MAX : last); + if (!ret) + truncate_inode_pages_range(mapping, first, last); + return -ret; +} + +int +xfs_flush_pages( + xfs_inode_t *ip, + xfs_off_t first, + xfs_off_t last, + uint64_t flags, + int fiopt) +{ + struct address_space *mapping = VFS_I(ip)->i_mapping; + int ret = 0; + int ret2; + + xfs_iflags_clear(ip, XFS_ITRUNCATED); + ret = -filemap_fdatawrite_range(mapping, first, + last == -1 ? LLONG_MAX : last); + if (flags & XBF_ASYNC) + return ret; + ret2 = xfs_wait_on_pages(ip, first, last); + if (!ret) + ret = ret2; + return ret; +} + +int +xfs_wait_on_pages( + xfs_inode_t *ip, + xfs_off_t first, + xfs_off_t last) +{ + struct address_space *mapping = VFS_I(ip)->i_mapping; + + if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { + return -filemap_fdatawait_range(mapping, first, + last == -1 ? ip->i_size - 1 : last); + } + return 0; +} diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c new file mode 100644 index 0000000..76e81cf --- /dev/null +++ b/fs/xfs/xfs_globals.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_sysctl.h" + +/* + * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, + * other XFS code uses these values. Times are measured in centisecs (i.e. + * 100ths of a second). + */ +xfs_param_t xfs_params = { + /* MIN DFLT MAX */ + .sgid_inherit = { 0, 0, 1 }, + .symlink_mode = { 0, 0, 1 }, + .panic_mask = { 0, 0, 255 }, + .error_level = { 0, 3, 11 }, + .syncd_timer = { 1*100, 30*100, 7200*100}, + .stats_clear = { 0, 0, 1 }, + .inherit_sync = { 0, 1, 1 }, + .inherit_nodump = { 0, 1, 1 }, + .inherit_noatim = { 0, 1, 1 }, + .xfs_buf_timer = { 100/2, 1*100, 30*100 }, + .xfs_buf_age = { 1*100, 15*100, 7200*100}, + .inherit_nosym = { 0, 0, 1 }, + .rotorstep = { 1, 1, 255 }, + .inherit_nodfrg = { 0, 1, 1 }, + .fstrm_timer = { 1, 30*100, 3600*100}, +}; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c new file mode 100644 index 0000000..f7ce7de --- /dev/null +++ b/fs/xfs/xfs_ioctl.c @@ -0,0 +1,1556 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_ioctl.h" +#include "xfs_rtalloc.h" +#include "xfs_itable.h" +#include "xfs_error.h" +#include "xfs_attr.h" +#include "xfs_bmap.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_dfrag.h" +#include "xfs_fsops.h" +#include "xfs_vnodeops.h" +#include "xfs_discard.h" +#include "xfs_quota.h" +#include "xfs_inode_item.h" +#include "xfs_export.h" +#include "xfs_trace.h" + +#include +#include +#include +#include +#include +#include +#include + +/* + * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to + * a file or fs handle. + * + * XFS_IOC_PATH_TO_FSHANDLE + * returns fs handle for a mount point or path within that mount point + * XFS_IOC_FD_TO_HANDLE + * returns full handle for a FD opened in user space + * XFS_IOC_PATH_TO_HANDLE + * returns full handle for a path + */ +int +xfs_find_handle( + unsigned int cmd, + xfs_fsop_handlereq_t *hreq) +{ + int hsize; + xfs_handle_t handle; + struct inode *inode; + struct file *file = NULL; + struct path path; + int error; + struct xfs_inode *ip; + + if (cmd == XFS_IOC_FD_TO_HANDLE) { + file = fget(hreq->fd); + if (!file) + return -EBADF; + inode = file->f_path.dentry->d_inode; + } else { + error = user_lpath((const char __user *)hreq->path, &path); + if (error) + return error; + inode = path.dentry->d_inode; + } + ip = XFS_I(inode); + + /* + * We can only generate handles for inodes residing on a XFS filesystem, + * and only for regular files, directories or symbolic links. + */ + error = -EINVAL; + if (inode->i_sb->s_magic != XFS_SB_MAGIC) + goto out_put; + + error = -EBADF; + if (!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode) && + !S_ISLNK(inode->i_mode)) + goto out_put; + + + memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); + + if (cmd == XFS_IOC_PATH_TO_FSHANDLE) { + /* + * This handle only contains an fsid, zero the rest. + */ + memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); + hsize = sizeof(xfs_fsid_t); + } else { + int lock_mode; + + lock_mode = xfs_ilock_map_shared(ip); + handle.ha_fid.fid_len = sizeof(xfs_fid_t) - + sizeof(handle.ha_fid.fid_len); + handle.ha_fid.fid_pad = 0; + handle.ha_fid.fid_gen = ip->i_d.di_gen; + handle.ha_fid.fid_ino = ip->i_ino; + xfs_iunlock_map_shared(ip, lock_mode); + + hsize = XFS_HSIZE(handle); + } + + error = -EFAULT; + if (copy_to_user(hreq->ohandle, &handle, hsize) || + copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) + goto out_put; + + error = 0; + + out_put: + if (cmd == XFS_IOC_FD_TO_HANDLE) + fput(file); + else + path_put(&path); + return error; +} + +/* + * No need to do permission checks on the various pathname components + * as the handle operations are privileged. + */ +STATIC int +xfs_handle_acceptable( + void *context, + struct dentry *dentry) +{ + return 1; +} + +/* + * Convert userspace handle data into a dentry. + */ +struct dentry * +xfs_handle_to_dentry( + struct file *parfilp, + void __user *uhandle, + u32 hlen) +{ + xfs_handle_t handle; + struct xfs_fid64 fid; + + /* + * Only allow handle opens under a directory. + */ + if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode)) + return ERR_PTR(-ENOTDIR); + + if (hlen != sizeof(xfs_handle_t)) + return ERR_PTR(-EINVAL); + if (copy_from_user(&handle, uhandle, hlen)) + return ERR_PTR(-EFAULT); + if (handle.ha_fid.fid_len != + sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len)) + return ERR_PTR(-EINVAL); + + memset(&fid, 0, sizeof(struct fid)); + fid.ino = handle.ha_fid.fid_ino; + fid.gen = handle.ha_fid.fid_gen; + + return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3, + FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, + xfs_handle_acceptable, NULL); +} + +STATIC struct dentry * +xfs_handlereq_to_dentry( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen); +} + +int +xfs_open_by_handle( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + const struct cred *cred = current_cred(); + int error; + int fd; + int permflag; + struct file *filp; + struct inode *inode; + struct dentry *dentry; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + + dentry = xfs_handlereq_to_dentry(parfilp, hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + inode = dentry->d_inode; + + /* Restrict xfs_open_by_handle to directories & regular files. */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { + error = -XFS_ERROR(EPERM); + goto out_dput; + } + +#if BITS_PER_LONG != 32 + hreq->oflags |= O_LARGEFILE; +#endif + + /* Put open permission in namei format. */ + permflag = hreq->oflags; + if ((permflag+1) & O_ACCMODE) + permflag++; + if (permflag & O_TRUNC) + permflag |= 2; + + if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && + (permflag & FMODE_WRITE) && IS_APPEND(inode)) { + error = -XFS_ERROR(EPERM); + goto out_dput; + } + + if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) { + error = -XFS_ERROR(EACCES); + goto out_dput; + } + + /* Can't write directories. */ + if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) { + error = -XFS_ERROR(EISDIR); + goto out_dput; + } + + fd = get_unused_fd(); + if (fd < 0) { + error = fd; + goto out_dput; + } + + filp = dentry_open(dentry, mntget(parfilp->f_path.mnt), + hreq->oflags, cred); + if (IS_ERR(filp)) { + put_unused_fd(fd); + return PTR_ERR(filp); + } + + if (S_ISREG(inode->i_mode)) { + filp->f_flags |= O_NOATIME; + filp->f_mode |= FMODE_NOCMTIME; + } + + fd_install(fd, filp); + return fd; + + out_dput: + dput(dentry); + return error; +} + +/* + * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's + * unused first argument. + */ +STATIC int +do_readlink( + char __user *buffer, + int buflen, + const char *link) +{ + int len; + + len = PTR_ERR(link); + if (IS_ERR(link)) + goto out; + + len = strlen(link); + if (len > (unsigned) buflen) + len = buflen; + if (copy_to_user(buffer, link, len)) + len = -EFAULT; + out: + return len; +} + + +int +xfs_readlink_by_handle( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + struct dentry *dentry; + __u32 olen; + void *link; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + + dentry = xfs_handlereq_to_dentry(parfilp, hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + /* Restrict this handle operation to symlinks only. */ + if (!S_ISLNK(dentry->d_inode->i_mode)) { + error = -XFS_ERROR(EINVAL); + goto out_dput; + } + + if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { + error = -XFS_ERROR(EFAULT); + goto out_dput; + } + + link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); + if (!link) { + error = -XFS_ERROR(ENOMEM); + goto out_dput; + } + + error = -xfs_readlink(XFS_I(dentry->d_inode), link); + if (error) + goto out_kfree; + error = do_readlink(hreq->ohandle, olen, link); + if (error) + goto out_kfree; + + out_kfree: + kfree(link); + out_dput: + dput(dentry); + return error; +} + +STATIC int +xfs_fssetdm_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error; + struct fsdmidata fsd; + xfs_fsop_setdm_handlereq_t dmhreq; + struct dentry *dentry; + + if (!capable(CAP_MKNOD)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) + return -XFS_ERROR(EFAULT); + + dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { + error = -XFS_ERROR(EPERM); + goto out; + } + + if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { + error = -XFS_ERROR(EFAULT); + goto out; + } + + error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, + fsd.fsd_dmstate); + + out: + dput(dentry); + return error; +} + +STATIC int +xfs_attrlist_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error = -ENOMEM; + attrlist_cursor_kern_t *cursor; + xfs_fsop_attrlist_handlereq_t al_hreq; + struct dentry *dentry; + char *kbuf; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) + return -XFS_ERROR(EFAULT); + if (al_hreq.buflen > XATTR_LIST_MAX) + return -XFS_ERROR(EINVAL); + + /* + * Reject flags, only allow namespaces. + */ + if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) + return -XFS_ERROR(EINVAL); + + dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL); + if (!kbuf) + goto out_dput; + + cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; + error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, + al_hreq.flags, cursor); + if (error) + goto out_kfree; + + if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) + error = -EFAULT; + + out_kfree: + kfree(kbuf); + out_dput: + dput(dentry); + return error; +} + +int +xfs_attrmulti_attr_get( + struct inode *inode, + unsigned char *name, + unsigned char __user *ubuf, + __uint32_t *len, + __uint32_t flags) +{ + unsigned char *kbuf; + int error = EFAULT; + + if (*len > XATTR_SIZE_MAX) + return EINVAL; + kbuf = kmalloc(*len, GFP_KERNEL); + if (!kbuf) + return ENOMEM; + + error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); + if (error) + goto out_kfree; + + if (copy_to_user(ubuf, kbuf, *len)) + error = EFAULT; + + out_kfree: + kfree(kbuf); + return error; +} + +int +xfs_attrmulti_attr_set( + struct inode *inode, + unsigned char *name, + const unsigned char __user *ubuf, + __uint32_t len, + __uint32_t flags) +{ + unsigned char *kbuf; + int error = EFAULT; + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return EPERM; + if (len > XATTR_SIZE_MAX) + return EINVAL; + + kbuf = memdup_user(ubuf, len); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); + + return error; +} + +int +xfs_attrmulti_attr_remove( + struct inode *inode, + unsigned char *name, + __uint32_t flags) +{ + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return EPERM; + return xfs_attr_remove(XFS_I(inode), name, flags); +} + +STATIC int +xfs_attrmulti_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error; + xfs_attr_multiop_t *ops; + xfs_fsop_attrmulti_handlereq_t am_hreq; + struct dentry *dentry; + unsigned int i, size; + unsigned char *attr_name; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) + return -XFS_ERROR(EFAULT); + + /* overflow check */ + if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) + return -E2BIG; + + dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + error = E2BIG; + size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); + if (!size || size > 16 * PAGE_SIZE) + goto out_dput; + + ops = memdup_user(am_hreq.ops, size); + if (IS_ERR(ops)) { + error = PTR_ERR(ops); + goto out_dput; + } + + attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); + if (!attr_name) + goto out_kfree_ops; + + error = 0; + for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_error = strncpy_from_user((char *)attr_name, + ops[i].am_attrname, MAXNAMELEN); + if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) + error = -ERANGE; + if (ops[i].am_error < 0) + break; + + switch (ops[i].am_opcode) { + case ATTR_OP_GET: + ops[i].am_error = xfs_attrmulti_attr_get( + dentry->d_inode, attr_name, + ops[i].am_attrvalue, &ops[i].am_length, + ops[i].am_flags); + break; + case ATTR_OP_SET: + ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); + if (ops[i].am_error) + break; + ops[i].am_error = xfs_attrmulti_attr_set( + dentry->d_inode, attr_name, + ops[i].am_attrvalue, ops[i].am_length, + ops[i].am_flags); + mnt_drop_write(parfilp->f_path.mnt); + break; + case ATTR_OP_REMOVE: + ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); + if (ops[i].am_error) + break; + ops[i].am_error = xfs_attrmulti_attr_remove( + dentry->d_inode, attr_name, + ops[i].am_flags); + mnt_drop_write(parfilp->f_path.mnt); + break; + default: + ops[i].am_error = EINVAL; + } + } + + if (copy_to_user(am_hreq.ops, ops, size)) + error = XFS_ERROR(EFAULT); + + kfree(attr_name); + out_kfree_ops: + kfree(ops); + out_dput: + dput(dentry); + return -error; +} + +int +xfs_ioc_space( + struct xfs_inode *ip, + struct inode *inode, + struct file *filp, + int ioflags, + unsigned int cmd, + xfs_flock64_t *bf) +{ + int attr_flags = 0; + int error; + + /* + * Only allow the sys admin to reserve space unless + * unwritten extents are enabled. + */ + if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && + !capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + + if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) + return -XFS_ERROR(EPERM); + + if (!(filp->f_mode & FMODE_WRITE)) + return -XFS_ERROR(EBADF); + + if (!S_ISREG(inode->i_mode)) + return -XFS_ERROR(EINVAL); + + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + attr_flags |= XFS_ATTR_NONBLOCK; + + if (filp->f_flags & O_DSYNC) + attr_flags |= XFS_ATTR_SYNC; + + if (ioflags & IO_INVIS) + attr_flags |= XFS_ATTR_DMI; + + error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); + return -error; +} + +STATIC int +xfs_ioc_bulkstat( + xfs_mount_t *mp, + unsigned int cmd, + void __user *arg) +{ + xfs_fsop_bulkreq_t bulkreq; + int count; /* # of records returned */ + xfs_ino_t inlast; /* last inode number */ + int done; + int error; + + /* done = 1 if there are more stats to get and if bulkstat */ + /* should be called again (unused here, but used in dmapi) */ + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) + return -XFS_ERROR(EFAULT); + + if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) + return -XFS_ERROR(EFAULT); + + if ((count = bulkreq.icount) <= 0) + return -XFS_ERROR(EINVAL); + + if (bulkreq.ubuffer == NULL) + return -XFS_ERROR(EINVAL); + + if (cmd == XFS_IOC_FSINUMBERS) + error = xfs_inumbers(mp, &inlast, &count, + bulkreq.ubuffer, xfs_inumbers_fmt); + else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) + error = xfs_bulkstat_single(mp, &inlast, + bulkreq.ubuffer, &done); + else /* XFS_IOC_FSBULKSTAT */ + error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, + sizeof(xfs_bstat_t), bulkreq.ubuffer, + &done); + + if (error) + return -error; + + if (bulkreq.ocount != NULL) { + if (copy_to_user(bulkreq.lastip, &inlast, + sizeof(xfs_ino_t))) + return -XFS_ERROR(EFAULT); + + if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) + return -XFS_ERROR(EFAULT); + } + + return 0; +} + +STATIC int +xfs_ioc_fsgeometry_v1( + xfs_mount_t *mp, + void __user *arg) +{ + xfs_fsop_geom_t fsgeo; + int error; + + error = xfs_fs_geometry(mp, &fsgeo, 3); + if (error) + return -error; + + /* + * Caller should have passed an argument of type + * xfs_fsop_geom_v1_t. This is a proper subset of the + * xfs_fsop_geom_t that xfs_fs_geometry() fills in. + */ + if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t))) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_ioc_fsgeometry( + xfs_mount_t *mp, + void __user *arg) +{ + xfs_fsop_geom_t fsgeo; + int error; + + error = xfs_fs_geometry(mp, &fsgeo, 4); + if (error) + return -error; + + if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) + return -XFS_ERROR(EFAULT); + return 0; +} + +/* + * Linux extended inode flags interface. + */ + +STATIC unsigned int +xfs_merge_ioc_xflags( + unsigned int flags, + unsigned int start) +{ + unsigned int xflags = start; + + if (flags & FS_IMMUTABLE_FL) + xflags |= XFS_XFLAG_IMMUTABLE; + else + xflags &= ~XFS_XFLAG_IMMUTABLE; + if (flags & FS_APPEND_FL) + xflags |= XFS_XFLAG_APPEND; + else + xflags &= ~XFS_XFLAG_APPEND; + if (flags & FS_SYNC_FL) + xflags |= XFS_XFLAG_SYNC; + else + xflags &= ~XFS_XFLAG_SYNC; + if (flags & FS_NOATIME_FL) + xflags |= XFS_XFLAG_NOATIME; + else + xflags &= ~XFS_XFLAG_NOATIME; + if (flags & FS_NODUMP_FL) + xflags |= XFS_XFLAG_NODUMP; + else + xflags &= ~XFS_XFLAG_NODUMP; + + return xflags; +} + +STATIC unsigned int +xfs_di2lxflags( + __uint16_t di_flags) +{ + unsigned int flags = 0; + + if (di_flags & XFS_DIFLAG_IMMUTABLE) + flags |= FS_IMMUTABLE_FL; + if (di_flags & XFS_DIFLAG_APPEND) + flags |= FS_APPEND_FL; + if (di_flags & XFS_DIFLAG_SYNC) + flags |= FS_SYNC_FL; + if (di_flags & XFS_DIFLAG_NOATIME) + flags |= FS_NOATIME_FL; + if (di_flags & XFS_DIFLAG_NODUMP) + flags |= FS_NODUMP_FL; + return flags; +} + +STATIC int +xfs_ioc_fsgetxattr( + xfs_inode_t *ip, + int attr, + void __user *arg) +{ + struct fsxattr fa; + + memset(&fa, 0, sizeof(struct fsxattr)); + + xfs_ilock(ip, XFS_ILOCK_SHARED); + fa.fsx_xflags = xfs_ip2xflags(ip); + fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; + fa.fsx_projid = xfs_get_projid(ip); + + if (attr) { + if (ip->i_afp) { + if (ip->i_afp->if_flags & XFS_IFEXTENTS) + fa.fsx_nextents = ip->i_afp->if_bytes / + sizeof(xfs_bmbt_rec_t); + else + fa.fsx_nextents = ip->i_d.di_anextents; + } else + fa.fsx_nextents = 0; + } else { + if (ip->i_df.if_flags & XFS_IFEXTENTS) + fa.fsx_nextents = ip->i_df.if_bytes / + sizeof(xfs_bmbt_rec_t); + else + fa.fsx_nextents = ip->i_d.di_nextents; + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + if (copy_to_user(arg, &fa, sizeof(fa))) + return -EFAULT; + return 0; +} + +STATIC void +xfs_set_diflags( + struct xfs_inode *ip, + unsigned int xflags) +{ + unsigned int di_flags; + + /* can't set PREALLOC this way, just preserve it */ + di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); + if (xflags & XFS_XFLAG_IMMUTABLE) + di_flags |= XFS_DIFLAG_IMMUTABLE; + if (xflags & XFS_XFLAG_APPEND) + di_flags |= XFS_DIFLAG_APPEND; + if (xflags & XFS_XFLAG_SYNC) + di_flags |= XFS_DIFLAG_SYNC; + if (xflags & XFS_XFLAG_NOATIME) + di_flags |= XFS_DIFLAG_NOATIME; + if (xflags & XFS_XFLAG_NODUMP) + di_flags |= XFS_DIFLAG_NODUMP; + if (xflags & XFS_XFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; + if (xflags & XFS_XFLAG_NODEFRAG) + di_flags |= XFS_DIFLAG_NODEFRAG; + if (xflags & XFS_XFLAG_FILESTREAM) + di_flags |= XFS_DIFLAG_FILESTREAM; + if (S_ISDIR(ip->i_d.di_mode)) { + if (xflags & XFS_XFLAG_RTINHERIT) + di_flags |= XFS_DIFLAG_RTINHERIT; + if (xflags & XFS_XFLAG_NOSYMLINKS) + di_flags |= XFS_DIFLAG_NOSYMLINKS; + if (xflags & XFS_XFLAG_EXTSZINHERIT) + di_flags |= XFS_DIFLAG_EXTSZINHERIT; + } else if (S_ISREG(ip->i_d.di_mode)) { + if (xflags & XFS_XFLAG_REALTIME) + di_flags |= XFS_DIFLAG_REALTIME; + if (xflags & XFS_XFLAG_EXTSIZE) + di_flags |= XFS_DIFLAG_EXTSIZE; + } + + ip->i_d.di_flags = di_flags; +} + +STATIC void +xfs_diflags_to_linux( + struct xfs_inode *ip) +{ + struct inode *inode = VFS_I(ip); + unsigned int xflags = xfs_ip2xflags(ip); + + if (xflags & XFS_XFLAG_IMMUTABLE) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + if (xflags & XFS_XFLAG_APPEND) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; + if (xflags & XFS_XFLAG_SYNC) + inode->i_flags |= S_SYNC; + else + inode->i_flags &= ~S_SYNC; + if (xflags & XFS_XFLAG_NOATIME) + inode->i_flags |= S_NOATIME; + else + inode->i_flags &= ~S_NOATIME; +} + +#define FSX_PROJID 1 +#define FSX_EXTSIZE 2 +#define FSX_XFLAGS 4 +#define FSX_NONBLOCK 8 + +STATIC int +xfs_ioctl_setattr( + xfs_inode_t *ip, + struct fsxattr *fa, + int mask) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + unsigned int lock_flags = 0; + struct xfs_dquot *udqp = NULL; + struct xfs_dquot *gdqp = NULL; + struct xfs_dquot *olddquot = NULL; + int code; + + trace_xfs_ioctl_setattr(ip); + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + /* + * Disallow 32bit project ids when projid32bit feature is not enabled. + */ + if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && + !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) + return XFS_ERROR(EINVAL); + + /* + * If disk quotas is on, we make sure that the dquots do exist on disk, + * before we start any other transactions. Trying to do this later + * is messy. We don't care to take a readlock to look at the ids + * in inode here, because we can't hold it across the trans_reserve. + * If the IDs do change before we take the ilock, we're covered + * because the i_*dquot fields will get updated anyway. + */ + if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { + code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, + ip->i_d.di_gid, fa->fsx_projid, + XFS_QMOPT_PQUOTA, &udqp, &gdqp); + if (code) + return code; + } + + /* + * For the other attributes, we acquire the inode lock and + * first do an error checking pass. + */ + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); + code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + if (code) + goto error_return; + + lock_flags = XFS_ILOCK_EXCL; + xfs_ilock(ip, lock_flags); + + /* + * CAP_FOWNER overrides the following restrictions: + * + * The user ID of the calling process must be equal + * to the file owner ID, except in cases where the + * CAP_FSETID capability is applicable. + */ + if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) { + code = XFS_ERROR(EPERM); + goto error_return; + } + + /* + * Do a quota reservation only if projid is actually going to change. + */ + if (mask & FSX_PROJID) { + if (XFS_IS_QUOTA_RUNNING(mp) && + XFS_IS_PQUOTA_ON(mp) && + xfs_get_projid(ip) != fa->fsx_projid) { + ASSERT(tp); + code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, + capable(CAP_FOWNER) ? + XFS_QMOPT_FORCE_RES : 0); + if (code) /* out of quota */ + goto error_return; + } + } + + if (mask & FSX_EXTSIZE) { + /* + * Can't change extent size if any extents are allocated. + */ + if (ip->i_d.di_nextents && + ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != + fa->fsx_extsize)) { + code = XFS_ERROR(EINVAL); /* EFBIG? */ + goto error_return; + } + + /* + * Extent size must be a multiple of the appropriate block + * size, if set at all. It must also be smaller than the + * maximum extent size supported by the filesystem. + * + * Also, for non-realtime files, limit the extent size hint to + * half the size of the AGs in the filesystem so alignment + * doesn't result in extents larger than an AG. + */ + if (fa->fsx_extsize != 0) { + xfs_extlen_t size; + xfs_fsblock_t extsize_fsb; + + extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); + if (extsize_fsb > MAXEXTLEN) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + + if (XFS_IS_REALTIME_INODE(ip) || + ((mask & FSX_XFLAGS) && + (fa->fsx_xflags & XFS_XFLAG_REALTIME))) { + size = mp->m_sb.sb_rextsize << + mp->m_sb.sb_blocklog; + } else { + size = mp->m_sb.sb_blocksize; + if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + } + + if (fa->fsx_extsize % size) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + } + } + + + if (mask & FSX_XFLAGS) { + /* + * Can't change realtime flag if any extents are allocated. + */ + if ((ip->i_d.di_nextents || ip->i_delayed_blks) && + (XFS_IS_REALTIME_INODE(ip)) != + (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { + code = XFS_ERROR(EINVAL); /* EFBIG? */ + goto error_return; + } + + /* + * If realtime flag is set then must have realtime data. + */ + if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) { + if ((mp->m_sb.sb_rblocks == 0) || + (mp->m_sb.sb_rextsize == 0) || + (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + } + + /* + * Can't modify an immutable/append-only file unless + * we have appropriate permission. + */ + if ((ip->i_d.di_flags & + (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || + (fa->fsx_xflags & + (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && + !capable(CAP_LINUX_IMMUTABLE)) { + code = XFS_ERROR(EPERM); + goto error_return; + } + } + + xfs_trans_ijoin(tp, ip); + + /* + * Change file ownership. Must be the owner or privileged. + */ + if (mask & FSX_PROJID) { + /* + * CAP_FSETID overrides the following restrictions: + * + * The set-user-ID and set-group-ID bits of a file will be + * cleared upon successful return from chown() + */ + if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && + !capable(CAP_FSETID)) + ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); + + /* + * Change the ownerships and register quota modifications + * in the transaction. + */ + if (xfs_get_projid(ip) != fa->fsx_projid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { + olddquot = xfs_qm_vop_chown(tp, ip, + &ip->i_gdquot, gdqp); + } + xfs_set_projid(ip, fa->fsx_projid); + + /* + * We may have to rev the inode as well as + * the superblock version number since projids didn't + * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. + */ + if (ip->i_d.di_version == 1) + xfs_bump_ino_vers2(tp, ip); + } + + } + + if (mask & FSX_EXTSIZE) + ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; + if (mask & FSX_XFLAGS) { + xfs_set_diflags(ip, fa->fsx_xflags); + xfs_diflags_to_linux(ip); + } + + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + XFS_STATS_INC(xs_ig_attrchg); + + /* + * If this is a synchronous mount, make sure that the + * transaction goes to disk before returning to the user. + * This is slightly sub-optimal in that truncates require + * two sync transactions instead of one for wsync filesystems. + * One for the truncate and one for the timestamps since we + * don't want to change the timestamps unless we're sure the + * truncate worked. Truncates are less than 1% of the laddis + * mix so this probably isn't worth the trouble to optimize. + */ + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + code = xfs_trans_commit(tp, 0); + xfs_iunlock(ip, lock_flags); + + /* + * Release any dquot(s) the inode had kept before chown. + */ + xfs_qm_dqrele(olddquot); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + + return code; + + error_return: + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + xfs_trans_cancel(tp, 0); + if (lock_flags) + xfs_iunlock(ip, lock_flags); + return code; +} + +STATIC int +xfs_ioc_fssetxattr( + xfs_inode_t *ip, + struct file *filp, + void __user *arg) +{ + struct fsxattr fa; + unsigned int mask; + + if (copy_from_user(&fa, arg, sizeof(fa))) + return -EFAULT; + + mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID; + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + mask |= FSX_NONBLOCK; + + return -xfs_ioctl_setattr(ip, &fa, mask); +} + +STATIC int +xfs_ioc_getxflags( + xfs_inode_t *ip, + void __user *arg) +{ + unsigned int flags; + + flags = xfs_di2lxflags(ip->i_d.di_flags); + if (copy_to_user(arg, &flags, sizeof(flags))) + return -EFAULT; + return 0; +} + +STATIC int +xfs_ioc_setxflags( + xfs_inode_t *ip, + struct file *filp, + void __user *arg) +{ + struct fsxattr fa; + unsigned int flags; + unsigned int mask; + + if (copy_from_user(&flags, arg, sizeof(flags))) + return -EFAULT; + + if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ + FS_NOATIME_FL | FS_NODUMP_FL | \ + FS_SYNC_FL)) + return -EOPNOTSUPP; + + mask = FSX_XFLAGS; + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + mask |= FSX_NONBLOCK; + fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); + + return -xfs_ioctl_setattr(ip, &fa, mask); +} + +STATIC int +xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) +{ + struct getbmap __user *base = *ap; + + /* copy only getbmap portion (not getbmapx) */ + if (copy_to_user(base, bmv, sizeof(struct getbmap))) + return XFS_ERROR(EFAULT); + + *ap += sizeof(struct getbmap); + return 0; +} + +STATIC int +xfs_ioc_getbmap( + struct xfs_inode *ip, + int ioflags, + unsigned int cmd, + void __user *arg) +{ + struct getbmapx bmx; + int error; + + if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) + return -XFS_ERROR(EFAULT); + + if (bmx.bmv_count < 2) + return -XFS_ERROR(EINVAL); + + bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); + if (ioflags & IO_INVIS) + bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; + + error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, + (struct getbmap *)arg+1); + if (error) + return -error; + + /* copy back header - only size of getbmap */ + if (copy_to_user(arg, &bmx, sizeof(struct getbmap))) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) +{ + struct getbmapx __user *base = *ap; + + if (copy_to_user(base, bmv, sizeof(struct getbmapx))) + return XFS_ERROR(EFAULT); + + *ap += sizeof(struct getbmapx); + return 0; +} + +STATIC int +xfs_ioc_getbmapx( + struct xfs_inode *ip, + void __user *arg) +{ + struct getbmapx bmx; + int error; + + if (copy_from_user(&bmx, arg, sizeof(bmx))) + return -XFS_ERROR(EFAULT); + + if (bmx.bmv_count < 2) + return -XFS_ERROR(EINVAL); + + if (bmx.bmv_iflags & (~BMV_IF_VALID)) + return -XFS_ERROR(EINVAL); + + error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, + (struct getbmapx *)arg+1); + if (error) + return -error; + + /* copy back header */ + if (copy_to_user(arg, &bmx, sizeof(struct getbmapx))) + return -XFS_ERROR(EFAULT); + + return 0; +} + +/* + * Note: some of the ioctl's return positive numbers as a + * byte count indicating success, such as readlink_by_handle. + * So we don't "sign flip" like most other routines. This means + * true errors need to be returned as a negative value. + */ +long +xfs_file_ioctl( + struct file *filp, + unsigned int cmd, + unsigned long p) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + void __user *arg = (void __user *)p; + int ioflags = 0; + int error; + + if (filp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + trace_xfs_file_ioctl(ip); + + switch (cmd) { + case FITRIM: + return xfs_ioc_trim(mp, arg); + case XFS_IOC_ALLOCSP: + case XFS_IOC_FREESP: + case XFS_IOC_RESVSP: + case XFS_IOC_UNRESVSP: + case XFS_IOC_ALLOCSP64: + case XFS_IOC_FREESP64: + case XFS_IOC_RESVSP64: + case XFS_IOC_UNRESVSP64: + case XFS_IOC_ZERO_RANGE: { + xfs_flock64_t bf; + + if (copy_from_user(&bf, arg, sizeof(bf))) + return -XFS_ERROR(EFAULT); + return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); + } + case XFS_IOC_DIOINFO: { + struct dioattr da; + xfs_buftarg_t *target = + XFS_IS_REALTIME_INODE(ip) ? + mp->m_rtdev_targp : mp->m_ddev_targp; + + da.d_mem = da.d_miniosz = 1 << target->bt_sshift; + da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); + + if (copy_to_user(arg, &da, sizeof(da))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_FSBULKSTAT_SINGLE: + case XFS_IOC_FSBULKSTAT: + case XFS_IOC_FSINUMBERS: + return xfs_ioc_bulkstat(mp, cmd, arg); + + case XFS_IOC_FSGEOMETRY_V1: + return xfs_ioc_fsgeometry_v1(mp, arg); + + case XFS_IOC_FSGEOMETRY: + return xfs_ioc_fsgeometry(mp, arg); + + case XFS_IOC_GETVERSION: + return put_user(inode->i_generation, (int __user *)arg); + + case XFS_IOC_FSGETXATTR: + return xfs_ioc_fsgetxattr(ip, 0, arg); + case XFS_IOC_FSGETXATTRA: + return xfs_ioc_fsgetxattr(ip, 1, arg); + case XFS_IOC_FSSETXATTR: + return xfs_ioc_fssetxattr(ip, filp, arg); + case XFS_IOC_GETXFLAGS: + return xfs_ioc_getxflags(ip, arg); + case XFS_IOC_SETXFLAGS: + return xfs_ioc_setxflags(ip, filp, arg); + + case XFS_IOC_FSSETDM: { + struct fsdmidata dmi; + + if (copy_from_user(&dmi, arg, sizeof(dmi))) + return -XFS_ERROR(EFAULT); + + error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, + dmi.fsd_dmstate); + return -error; + } + + case XFS_IOC_GETBMAP: + case XFS_IOC_GETBMAPA: + return xfs_ioc_getbmap(ip, ioflags, cmd, arg); + + case XFS_IOC_GETBMAPX: + return xfs_ioc_getbmapx(ip, arg); + + case XFS_IOC_FD_TO_HANDLE: + case XFS_IOC_PATH_TO_HANDLE: + case XFS_IOC_PATH_TO_FSHANDLE: { + xfs_fsop_handlereq_t hreq; + + if (copy_from_user(&hreq, arg, sizeof(hreq))) + return -XFS_ERROR(EFAULT); + return xfs_find_handle(cmd, &hreq); + } + case XFS_IOC_OPEN_BY_HANDLE: { + xfs_fsop_handlereq_t hreq; + + if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + return xfs_open_by_handle(filp, &hreq); + } + case XFS_IOC_FSSETDM_BY_HANDLE: + return xfs_fssetdm_by_handle(filp, arg); + + case XFS_IOC_READLINK_BY_HANDLE: { + xfs_fsop_handlereq_t hreq; + + if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + return xfs_readlink_by_handle(filp, &hreq); + } + case XFS_IOC_ATTRLIST_BY_HANDLE: + return xfs_attrlist_by_handle(filp, arg); + + case XFS_IOC_ATTRMULTI_BY_HANDLE: + return xfs_attrmulti_by_handle(filp, arg); + + case XFS_IOC_SWAPEXT: { + struct xfs_swapext sxp; + + if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) + return -XFS_ERROR(EFAULT); + error = xfs_swapext(&sxp); + return -error; + } + + case XFS_IOC_FSCOUNTS: { + xfs_fsop_counts_t out; + + error = xfs_fs_counts(mp, &out); + if (error) + return -error; + + if (copy_to_user(arg, &out, sizeof(out))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_SET_RESBLKS: { + xfs_fsop_resblks_t inout; + __uint64_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return -XFS_ERROR(EROFS); + + if (copy_from_user(&inout, arg, sizeof(inout))) + return -XFS_ERROR(EFAULT); + + /* input parameter is passed in resblks field of structure */ + in = inout.resblks; + error = xfs_reserve_blocks(mp, &in, &inout); + if (error) + return -error; + + if (copy_to_user(arg, &inout, sizeof(inout))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_GET_RESBLKS: { + xfs_fsop_resblks_t out; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + error = xfs_reserve_blocks(mp, NULL, &out); + if (error) + return -error; + + if (copy_to_user(arg, &out, sizeof(out))) + return -XFS_ERROR(EFAULT); + + return 0; + } + + case XFS_IOC_FSGROWFSDATA: { + xfs_growfs_data_t in; + + if (copy_from_user(&in, arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_growfs_data(mp, &in); + return -error; + } + + case XFS_IOC_FSGROWFSLOG: { + xfs_growfs_log_t in; + + if (copy_from_user(&in, arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_growfs_log(mp, &in); + return -error; + } + + case XFS_IOC_FSGROWFSRT: { + xfs_growfs_rt_t in; + + if (copy_from_user(&in, arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_growfs_rt(mp, &in); + return -error; + } + + case XFS_IOC_GOINGDOWN: { + __uint32_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__uint32_t __user *)arg)) + return -XFS_ERROR(EFAULT); + + error = xfs_fs_goingdown(mp, in); + return -error; + } + + case XFS_IOC_ERROR_INJECTION: { + xfs_error_injection_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&in, arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_errortag_add(in.errtag, mp); + return -error; + } + + case XFS_IOC_ERROR_CLEARALL: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + error = xfs_errortag_clearall(mp, 1); + return -error; + + default: + return -ENOTTY; + } +} diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h new file mode 100644 index 0000000..d56173b --- /dev/null +++ b/fs/xfs/xfs_ioctl.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_IOCTL_H__ +#define __XFS_IOCTL_H__ + +extern int +xfs_ioc_space( + struct xfs_inode *ip, + struct inode *inode, + struct file *filp, + int ioflags, + unsigned int cmd, + xfs_flock64_t *bf); + +extern int +xfs_find_handle( + unsigned int cmd, + xfs_fsop_handlereq_t *hreq); + +extern int +xfs_open_by_handle( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq); + +extern int +xfs_readlink_by_handle( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq); + +extern int +xfs_attrmulti_attr_get( + struct inode *inode, + unsigned char *name, + unsigned char __user *ubuf, + __uint32_t *len, + __uint32_t flags); + +extern int +xfs_attrmulti_attr_set( + struct inode *inode, + unsigned char *name, + const unsigned char __user *ubuf, + __uint32_t len, + __uint32_t flags); + +extern int +xfs_attrmulti_attr_remove( + struct inode *inode, + unsigned char *name, + __uint32_t flags); + +extern struct dentry * +xfs_handle_to_dentry( + struct file *parfilp, + void __user *uhandle, + u32 hlen); + +extern long +xfs_file_ioctl( + struct file *filp, + unsigned int cmd, + unsigned long p); + +extern long +xfs_file_compat_ioctl( + struct file *file, + unsigned int cmd, + unsigned long arg); + +#endif diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c new file mode 100644 index 0000000..54e623b --- /dev/null +++ b/fs/xfs/xfs_ioctl32.c @@ -0,0 +1,672 @@ +/* + * Copyright (c) 2004-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_vnode.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_error.h" +#include "xfs_dfrag.h" +#include "xfs_vnodeops.h" +#include "xfs_fsops.h" +#include "xfs_alloc.h" +#include "xfs_rtalloc.h" +#include "xfs_attr.h" +#include "xfs_ioctl.h" +#include "xfs_ioctl32.h" +#include "xfs_trace.h" + +#define _NATIVE_IOC(cmd, type) \ + _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) + +#ifdef BROKEN_X86_ALIGNMENT +STATIC int +xfs_compat_flock64_copyin( + xfs_flock64_t *bf, + compat_xfs_flock64_t __user *arg32) +{ + if (get_user(bf->l_type, &arg32->l_type) || + get_user(bf->l_whence, &arg32->l_whence) || + get_user(bf->l_start, &arg32->l_start) || + get_user(bf->l_len, &arg32->l_len) || + get_user(bf->l_sysid, &arg32->l_sysid) || + get_user(bf->l_pid, &arg32->l_pid) || + copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32))) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_compat_ioc_fsgeometry_v1( + struct xfs_mount *mp, + compat_xfs_fsop_geom_v1_t __user *arg32) +{ + xfs_fsop_geom_t fsgeo; + int error; + + error = xfs_fs_geometry(mp, &fsgeo, 3); + if (error) + return -error; + /* The 32-bit variant simply has some padding at the end */ + if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1))) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_compat_growfs_data_copyin( + struct xfs_growfs_data *in, + compat_xfs_growfs_data_t __user *arg32) +{ + if (get_user(in->newblocks, &arg32->newblocks) || + get_user(in->imaxpct, &arg32->imaxpct)) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_compat_growfs_rt_copyin( + struct xfs_growfs_rt *in, + compat_xfs_growfs_rt_t __user *arg32) +{ + if (get_user(in->newblocks, &arg32->newblocks) || + get_user(in->extsize, &arg32->extsize)) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_inumbers_fmt_compat( + void __user *ubuffer, + const xfs_inogrp_t *buffer, + long count, + long *written) +{ + compat_xfs_inogrp_t __user *p32 = ubuffer; + long i; + + for (i = 0; i < count; i++) { + if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || + put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || + put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) + return -XFS_ERROR(EFAULT); + } + *written = count * sizeof(*p32); + return 0; +} + +#else +#define xfs_inumbers_fmt_compat xfs_inumbers_fmt +#endif /* BROKEN_X86_ALIGNMENT */ + +STATIC int +xfs_ioctl32_bstime_copyin( + xfs_bstime_t *bstime, + compat_xfs_bstime_t __user *bstime32) +{ + compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */ + + if (get_user(sec32, &bstime32->tv_sec) || + get_user(bstime->tv_nsec, &bstime32->tv_nsec)) + return -XFS_ERROR(EFAULT); + bstime->tv_sec = sec32; + return 0; +} + +/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */ +STATIC int +xfs_ioctl32_bstat_copyin( + xfs_bstat_t *bstat, + compat_xfs_bstat_t __user *bstat32) +{ + if (get_user(bstat->bs_ino, &bstat32->bs_ino) || + get_user(bstat->bs_mode, &bstat32->bs_mode) || + get_user(bstat->bs_nlink, &bstat32->bs_nlink) || + get_user(bstat->bs_uid, &bstat32->bs_uid) || + get_user(bstat->bs_gid, &bstat32->bs_gid) || + get_user(bstat->bs_rdev, &bstat32->bs_rdev) || + get_user(bstat->bs_blksize, &bstat32->bs_blksize) || + get_user(bstat->bs_size, &bstat32->bs_size) || + xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) || + xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) || + xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) || + get_user(bstat->bs_blocks, &bstat32->bs_size) || + get_user(bstat->bs_xflags, &bstat32->bs_size) || + get_user(bstat->bs_extsize, &bstat32->bs_extsize) || + get_user(bstat->bs_extents, &bstat32->bs_extents) || + get_user(bstat->bs_gen, &bstat32->bs_gen) || + get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || + get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || + get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || + get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || + get_user(bstat->bs_aextents, &bstat32->bs_aextents)) + return -XFS_ERROR(EFAULT); + return 0; +} + +/* XFS_IOC_FSBULKSTAT and friends */ + +STATIC int +xfs_bstime_store_compat( + compat_xfs_bstime_t __user *p32, + const xfs_bstime_t *p) +{ + __s32 sec32; + + sec32 = p->tv_sec; + if (put_user(sec32, &p32->tv_sec) || + put_user(p->tv_nsec, &p32->tv_nsec)) + return -XFS_ERROR(EFAULT); + return 0; +} + +/* Return 0 on success or positive error (to xfs_bulkstat()) */ +STATIC int +xfs_bulkstat_one_fmt_compat( + void __user *ubuffer, + int ubsize, + int *ubused, + const xfs_bstat_t *buffer) +{ + compat_xfs_bstat_t __user *p32 = ubuffer; + + if (ubsize < sizeof(*p32)) + return XFS_ERROR(ENOMEM); + + if (put_user(buffer->bs_ino, &p32->bs_ino) || + put_user(buffer->bs_mode, &p32->bs_mode) || + put_user(buffer->bs_nlink, &p32->bs_nlink) || + put_user(buffer->bs_uid, &p32->bs_uid) || + put_user(buffer->bs_gid, &p32->bs_gid) || + put_user(buffer->bs_rdev, &p32->bs_rdev) || + put_user(buffer->bs_blksize, &p32->bs_blksize) || + put_user(buffer->bs_size, &p32->bs_size) || + xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) || + xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) || + xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) || + put_user(buffer->bs_blocks, &p32->bs_blocks) || + put_user(buffer->bs_xflags, &p32->bs_xflags) || + put_user(buffer->bs_extsize, &p32->bs_extsize) || + put_user(buffer->bs_extents, &p32->bs_extents) || + put_user(buffer->bs_gen, &p32->bs_gen) || + put_user(buffer->bs_projid, &p32->bs_projid) || + put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || + put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || + put_user(buffer->bs_dmstate, &p32->bs_dmstate) || + put_user(buffer->bs_aextents, &p32->bs_aextents)) + return XFS_ERROR(EFAULT); + if (ubused) + *ubused = sizeof(*p32); + return 0; +} + +STATIC int +xfs_bulkstat_one_compat( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* buffer to place output in */ + int ubsize, /* size of buffer */ + int *ubused, /* bytes used by me */ + int *stat) /* BULKSTAT_RV_... */ +{ + return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, + xfs_bulkstat_one_fmt_compat, + ubused, stat); +} + +/* copied from xfs_ioctl.c */ +STATIC int +xfs_compat_ioc_bulkstat( + xfs_mount_t *mp, + unsigned int cmd, + compat_xfs_fsop_bulkreq_t __user *p32) +{ + u32 addr; + xfs_fsop_bulkreq_t bulkreq; + int count; /* # of records returned */ + xfs_ino_t inlast; /* last inode number */ + int done; + int error; + + /* done = 1 if there are more stats to get and if bulkstat */ + /* should be called again (unused here, but used in dmapi) */ + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + if (get_user(addr, &p32->lastip)) + return -XFS_ERROR(EFAULT); + bulkreq.lastip = compat_ptr(addr); + if (get_user(bulkreq.icount, &p32->icount) || + get_user(addr, &p32->ubuffer)) + return -XFS_ERROR(EFAULT); + bulkreq.ubuffer = compat_ptr(addr); + if (get_user(addr, &p32->ocount)) + return -XFS_ERROR(EFAULT); + bulkreq.ocount = compat_ptr(addr); + + if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) + return -XFS_ERROR(EFAULT); + + if ((count = bulkreq.icount) <= 0) + return -XFS_ERROR(EINVAL); + + if (bulkreq.ubuffer == NULL) + return -XFS_ERROR(EINVAL); + + if (cmd == XFS_IOC_FSINUMBERS_32) { + error = xfs_inumbers(mp, &inlast, &count, + bulkreq.ubuffer, xfs_inumbers_fmt_compat); + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { + int res; + + error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, + sizeof(compat_xfs_bstat_t), 0, &res); + } else if (cmd == XFS_IOC_FSBULKSTAT_32) { + error = xfs_bulkstat(mp, &inlast, &count, + xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), + bulkreq.ubuffer, &done); + } else + error = XFS_ERROR(EINVAL); + if (error) + return -error; + + if (bulkreq.ocount != NULL) { + if (copy_to_user(bulkreq.lastip, &inlast, + sizeof(xfs_ino_t))) + return -XFS_ERROR(EFAULT); + + if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) + return -XFS_ERROR(EFAULT); + } + + return 0; +} + +STATIC int +xfs_compat_handlereq_copyin( + xfs_fsop_handlereq_t *hreq, + compat_xfs_fsop_handlereq_t __user *arg32) +{ + compat_xfs_fsop_handlereq_t hreq32; + + if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + + hreq->fd = hreq32.fd; + hreq->path = compat_ptr(hreq32.path); + hreq->oflags = hreq32.oflags; + hreq->ihandle = compat_ptr(hreq32.ihandle); + hreq->ihandlen = hreq32.ihandlen; + hreq->ohandle = compat_ptr(hreq32.ohandle); + hreq->ohandlen = compat_ptr(hreq32.ohandlen); + + return 0; +} + +STATIC struct dentry * +xfs_compat_handlereq_to_dentry( + struct file *parfilp, + compat_xfs_fsop_handlereq_t *hreq) +{ + return xfs_handle_to_dentry(parfilp, + compat_ptr(hreq->ihandle), hreq->ihandlen); +} + +STATIC int +xfs_compat_attrlist_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error; + attrlist_cursor_kern_t *cursor; + compat_xfs_fsop_attrlist_handlereq_t al_hreq; + struct dentry *dentry; + char *kbuf; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&al_hreq, arg, + sizeof(compat_xfs_fsop_attrlist_handlereq_t))) + return -XFS_ERROR(EFAULT); + if (al_hreq.buflen > XATTR_LIST_MAX) + return -XFS_ERROR(EINVAL); + + /* + * Reject flags, only allow namespaces. + */ + if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) + return -XFS_ERROR(EINVAL); + + dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + error = -ENOMEM; + kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); + if (!kbuf) + goto out_dput; + + cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; + error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, + al_hreq.flags, cursor); + if (error) + goto out_kfree; + + if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen)) + error = -EFAULT; + + out_kfree: + kfree(kbuf); + out_dput: + dput(dentry); + return error; +} + +STATIC int +xfs_compat_attrmulti_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error; + compat_xfs_attr_multiop_t *ops; + compat_xfs_fsop_attrmulti_handlereq_t am_hreq; + struct dentry *dentry; + unsigned int i, size; + unsigned char *attr_name; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&am_hreq, arg, + sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) + return -XFS_ERROR(EFAULT); + + /* overflow check */ + if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t)) + return -E2BIG; + + dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + error = E2BIG; + size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t); + if (!size || size > 16 * PAGE_SIZE) + goto out_dput; + + ops = memdup_user(compat_ptr(am_hreq.ops), size); + if (IS_ERR(ops)) { + error = PTR_ERR(ops); + goto out_dput; + } + + attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); + if (!attr_name) + goto out_kfree_ops; + + error = 0; + for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_error = strncpy_from_user((char *)attr_name, + compat_ptr(ops[i].am_attrname), + MAXNAMELEN); + if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) + error = -ERANGE; + if (ops[i].am_error < 0) + break; + + switch (ops[i].am_opcode) { + case ATTR_OP_GET: + ops[i].am_error = xfs_attrmulti_attr_get( + dentry->d_inode, attr_name, + compat_ptr(ops[i].am_attrvalue), + &ops[i].am_length, ops[i].am_flags); + break; + case ATTR_OP_SET: + ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); + if (ops[i].am_error) + break; + ops[i].am_error = xfs_attrmulti_attr_set( + dentry->d_inode, attr_name, + compat_ptr(ops[i].am_attrvalue), + ops[i].am_length, ops[i].am_flags); + mnt_drop_write(parfilp->f_path.mnt); + break; + case ATTR_OP_REMOVE: + ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); + if (ops[i].am_error) + break; + ops[i].am_error = xfs_attrmulti_attr_remove( + dentry->d_inode, attr_name, + ops[i].am_flags); + mnt_drop_write(parfilp->f_path.mnt); + break; + default: + ops[i].am_error = EINVAL; + } + } + + if (copy_to_user(compat_ptr(am_hreq.ops), ops, size)) + error = XFS_ERROR(EFAULT); + + kfree(attr_name); + out_kfree_ops: + kfree(ops); + out_dput: + dput(dentry); + return -error; +} + +STATIC int +xfs_compat_fssetdm_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error; + struct fsdmidata fsd; + compat_xfs_fsop_setdm_handlereq_t dmhreq; + struct dentry *dentry; + + if (!capable(CAP_MKNOD)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&dmhreq, arg, + sizeof(compat_xfs_fsop_setdm_handlereq_t))) + return -XFS_ERROR(EFAULT); + + dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { + error = -XFS_ERROR(EPERM); + goto out; + } + + if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) { + error = -XFS_ERROR(EFAULT); + goto out; + } + + error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, + fsd.fsd_dmstate); + +out: + dput(dentry); + return error; +} + +long +xfs_file_compat_ioctl( + struct file *filp, + unsigned cmd, + unsigned long p) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + void __user *arg = (void __user *)p; + int ioflags = 0; + int error; + + if (filp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + trace_xfs_file_compat_ioctl(ip); + + switch (cmd) { + /* No size or alignment issues on any arch */ + case XFS_IOC_DIOINFO: + case XFS_IOC_FSGEOMETRY: + case XFS_IOC_FSGETXATTR: + case XFS_IOC_FSSETXATTR: + case XFS_IOC_FSGETXATTRA: + case XFS_IOC_FSSETDM: + case XFS_IOC_GETBMAP: + case XFS_IOC_GETBMAPA: + case XFS_IOC_GETBMAPX: + case XFS_IOC_FSCOUNTS: + case XFS_IOC_SET_RESBLKS: + case XFS_IOC_GET_RESBLKS: + case XFS_IOC_FSGROWFSLOG: + case XFS_IOC_GOINGDOWN: + case XFS_IOC_ERROR_INJECTION: + case XFS_IOC_ERROR_CLEARALL: + return xfs_file_ioctl(filp, cmd, p); +#ifndef BROKEN_X86_ALIGNMENT + /* These are handled fine if no alignment issues */ + case XFS_IOC_ALLOCSP: + case XFS_IOC_FREESP: + case XFS_IOC_RESVSP: + case XFS_IOC_UNRESVSP: + case XFS_IOC_ALLOCSP64: + case XFS_IOC_FREESP64: + case XFS_IOC_RESVSP64: + case XFS_IOC_UNRESVSP64: + case XFS_IOC_FSGEOMETRY_V1: + case XFS_IOC_FSGROWFSDATA: + case XFS_IOC_FSGROWFSRT: + case XFS_IOC_ZERO_RANGE: + return xfs_file_ioctl(filp, cmd, p); +#else + case XFS_IOC_ALLOCSP_32: + case XFS_IOC_FREESP_32: + case XFS_IOC_ALLOCSP64_32: + case XFS_IOC_FREESP64_32: + case XFS_IOC_RESVSP_32: + case XFS_IOC_UNRESVSP_32: + case XFS_IOC_RESVSP64_32: + case XFS_IOC_UNRESVSP64_32: + case XFS_IOC_ZERO_RANGE_32: { + struct xfs_flock64 bf; + + if (xfs_compat_flock64_copyin(&bf, arg)) + return -XFS_ERROR(EFAULT); + cmd = _NATIVE_IOC(cmd, struct xfs_flock64); + return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); + } + case XFS_IOC_FSGEOMETRY_V1_32: + return xfs_compat_ioc_fsgeometry_v1(mp, arg); + case XFS_IOC_FSGROWFSDATA_32: { + struct xfs_growfs_data in; + + if (xfs_compat_growfs_data_copyin(&in, arg)) + return -XFS_ERROR(EFAULT); + error = xfs_growfs_data(mp, &in); + return -error; + } + case XFS_IOC_FSGROWFSRT_32: { + struct xfs_growfs_rt in; + + if (xfs_compat_growfs_rt_copyin(&in, arg)) + return -XFS_ERROR(EFAULT); + error = xfs_growfs_rt(mp, &in); + return -error; + } +#endif + /* long changes size, but xfs only copiese out 32 bits */ + case XFS_IOC_GETXFLAGS_32: + case XFS_IOC_SETXFLAGS_32: + case XFS_IOC_GETVERSION_32: + cmd = _NATIVE_IOC(cmd, long); + return xfs_file_ioctl(filp, cmd, p); + case XFS_IOC_SWAPEXT_32: { + struct xfs_swapext sxp; + struct compat_xfs_swapext __user *sxu = arg; + + /* Bulk copy in up to the sx_stat field, then copy bstat */ + if (copy_from_user(&sxp, sxu, + offsetof(struct xfs_swapext, sx_stat)) || + xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) + return -XFS_ERROR(EFAULT); + error = xfs_swapext(&sxp); + return -error; + } + case XFS_IOC_FSBULKSTAT_32: + case XFS_IOC_FSBULKSTAT_SINGLE_32: + case XFS_IOC_FSINUMBERS_32: + return xfs_compat_ioc_bulkstat(mp, cmd, arg); + case XFS_IOC_FD_TO_HANDLE_32: + case XFS_IOC_PATH_TO_HANDLE_32: + case XFS_IOC_PATH_TO_FSHANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); + cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); + return xfs_find_handle(cmd, &hreq); + } + case XFS_IOC_OPEN_BY_HANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); + return xfs_open_by_handle(filp, &hreq); + } + case XFS_IOC_READLINK_BY_HANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); + return xfs_readlink_by_handle(filp, &hreq); + } + case XFS_IOC_ATTRLIST_BY_HANDLE_32: + return xfs_compat_attrlist_by_handle(filp, arg); + case XFS_IOC_ATTRMULTI_BY_HANDLE_32: + return xfs_compat_attrmulti_by_handle(filp, arg); + case XFS_IOC_FSSETDM_BY_HANDLE_32: + return xfs_compat_fssetdm_by_handle(filp, arg); + default: + return -XFS_ERROR(ENOIOCTLCMD); + } +} diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h new file mode 100644 index 0000000..80f4060 --- /dev/null +++ b/fs/xfs/xfs_ioctl32.h @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2004-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_IOCTL32_H__ +#define __XFS_IOCTL32_H__ + +#include + +/* + * on 32-bit arches, ioctl argument structures may have different sizes + * and/or alignment. We define compat structures which match the + * 32-bit sizes/alignments here, and their associated ioctl numbers. + * + * xfs_ioctl32.c contains routines to copy these structures in and out. + */ + +/* stock kernel-level ioctls we support */ +#define XFS_IOC_GETXFLAGS_32 FS_IOC32_GETFLAGS +#define XFS_IOC_SETXFLAGS_32 FS_IOC32_SETFLAGS +#define XFS_IOC_GETVERSION_32 FS_IOC32_GETVERSION + +/* + * On intel, even if sizes match, alignment and/or padding may differ. + */ +#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) +#define BROKEN_X86_ALIGNMENT +#define __compat_packed __attribute__((packed)) +#else +#define __compat_packed +#endif + +typedef struct compat_xfs_bstime { + compat_time_t tv_sec; /* seconds */ + __s32 tv_nsec; /* and nanoseconds */ +} compat_xfs_bstime_t; + +typedef struct compat_xfs_bstat { + __u64 bs_ino; /* inode number */ + __u16 bs_mode; /* type and mode */ + __u16 bs_nlink; /* number of links */ + __u32 bs_uid; /* user id */ + __u32 bs_gid; /* group id */ + __u32 bs_rdev; /* device value */ + __s32 bs_blksize; /* block size */ + __s64 bs_size; /* file size */ + compat_xfs_bstime_t bs_atime; /* access time */ + compat_xfs_bstime_t bs_mtime; /* modify time */ + compat_xfs_bstime_t bs_ctime; /* inode change time */ + int64_t bs_blocks; /* number of blocks */ + __u32 bs_xflags; /* extended flags */ + __s32 bs_extsize; /* extent size */ + __s32 bs_extents; /* number of extents */ + __u32 bs_gen; /* generation count */ + __u16 bs_projid_lo; /* lower part of project id */ +#define bs_projid bs_projid_lo /* (previously just bs_projid) */ + __u16 bs_projid_hi; /* high part of project id */ + unsigned char bs_pad[12]; /* pad space, unused */ + __u32 bs_dmevmask; /* DMIG event mask */ + __u16 bs_dmstate; /* DMIG state info */ + __u16 bs_aextents; /* attribute number of extents */ +} __compat_packed compat_xfs_bstat_t; + +typedef struct compat_xfs_fsop_bulkreq { + compat_uptr_t lastip; /* last inode # pointer */ + __s32 icount; /* count of entries in buffer */ + compat_uptr_t ubuffer; /* user buffer for inode desc. */ + compat_uptr_t ocount; /* output count pointer */ +} compat_xfs_fsop_bulkreq_t; + +#define XFS_IOC_FSBULKSTAT_32 \ + _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ + _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSINUMBERS_32 \ + _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) + +typedef struct compat_xfs_fsop_handlereq { + __u32 fd; /* fd for FD_TO_HANDLE */ + compat_uptr_t path; /* user pathname */ + __u32 oflags; /* open flags */ + compat_uptr_t ihandle; /* user supplied handle */ + __u32 ihandlen; /* user supplied length */ + compat_uptr_t ohandle; /* user buffer for handle */ + compat_uptr_t ohandlen; /* user buffer length */ +} compat_xfs_fsop_handlereq_t; + +#define XFS_IOC_PATH_TO_FSHANDLE_32 \ + _IOWR('X', 104, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_PATH_TO_HANDLE_32 \ + _IOWR('X', 105, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_FD_TO_HANDLE_32 \ + _IOWR('X', 106, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_OPEN_BY_HANDLE_32 \ + _IOWR('X', 107, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_READLINK_BY_HANDLE_32 \ + _IOWR('X', 108, struct compat_xfs_fsop_handlereq) + +/* The bstat field in the swapext struct needs translation */ +typedef struct compat_xfs_swapext { + __int64_t sx_version; /* version */ + __int64_t sx_fdtarget; /* fd of target file */ + __int64_t sx_fdtmp; /* fd of tmp file */ + xfs_off_t sx_offset; /* offset into file */ + xfs_off_t sx_length; /* leng from offset */ + char sx_pad[16]; /* pad space, unused */ + compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */ +} __compat_packed compat_xfs_swapext_t; + +#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) + +typedef struct compat_xfs_fsop_attrlist_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ + struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */ + __u32 flags; /* which namespace to use */ + __u32 buflen; /* length of buffer supplied */ + compat_uptr_t buffer; /* returned names */ +} __compat_packed compat_xfs_fsop_attrlist_handlereq_t; + +/* Note: actually this is read/write */ +#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \ + _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq) + +/* am_opcodes defined in xfs_fs.h */ +typedef struct compat_xfs_attr_multiop { + __u32 am_opcode; + __s32 am_error; + compat_uptr_t am_attrname; + compat_uptr_t am_attrvalue; + __u32 am_length; + __u32 am_flags; +} compat_xfs_attr_multiop_t; + +typedef struct compat_xfs_fsop_attrmulti_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ + __u32 opcount;/* count of following multiop */ + /* ptr to compat_xfs_attr_multiop */ + compat_uptr_t ops; /* attr_multi data */ +} compat_xfs_fsop_attrmulti_handlereq_t; + +#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \ + _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq) + +typedef struct compat_xfs_fsop_setdm_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle information */ + /* ptr to struct fsdmidata */ + compat_uptr_t data; /* DMAPI data */ +} compat_xfs_fsop_setdm_handlereq_t; + +#define XFS_IOC_FSSETDM_BY_HANDLE_32 \ + _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq) + +#ifdef BROKEN_X86_ALIGNMENT +/* on ia32 l_start is on a 32-bit boundary */ +typedef struct compat_xfs_flock64 { + __s16 l_type; + __s16 l_whence; + __s64 l_start __attribute__((packed)); + /* len == 0 means until end of file */ + __s64 l_len __attribute__((packed)); + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +} compat_xfs_flock64_t; + +#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64) +#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64) +#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64) +#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64) +#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64) +#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) +#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) +#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) +#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64) + +typedef struct compat_xfs_fsop_geom_v1 { + __u32 blocksize; /* filesystem (data) block size */ + __u32 rtextsize; /* realtime extent size */ + __u32 agblocks; /* fsblocks in an AG */ + __u32 agcount; /* number of allocation groups */ + __u32 logblocks; /* fsblocks in the log */ + __u32 sectsize; /* (data) sector size, bytes */ + __u32 inodesize; /* inode size in bytes */ + __u32 imaxpct; /* max allowed inode space(%) */ + __u64 datablocks; /* fsblocks in data subvolume */ + __u64 rtblocks; /* fsblocks in realtime subvol */ + __u64 rtextents; /* rt extents in realtime subvol*/ + __u64 logstart; /* starting fsblock of the log */ + unsigned char uuid[16]; /* unique id of the filesystem */ + __u32 sunit; /* stripe unit, fsblocks */ + __u32 swidth; /* stripe width, fsblocks */ + __s32 version; /* structure version */ + __u32 flags; /* superblock version flags */ + __u32 logsectsize; /* log sector size, bytes */ + __u32 rtsectsize; /* realtime sector size, bytes */ + __u32 dirblocksize; /* directory block size, bytes */ +} __attribute__((packed)) compat_xfs_fsop_geom_v1_t; + +#define XFS_IOC_FSGEOMETRY_V1_32 \ + _IOR('X', 100, struct compat_xfs_fsop_geom_v1) + +typedef struct compat_xfs_inogrp { + __u64 xi_startino; /* starting inode number */ + __s32 xi_alloccount; /* # bits set in allocmask */ + __u64 xi_allocmask; /* mask of allocated inodes */ +} __attribute__((packed)) compat_xfs_inogrp_t; + +/* These growfs input structures have padding on the end, so must translate */ +typedef struct compat_xfs_growfs_data { + __u64 newblocks; /* new data subvol size, fsblocks */ + __u32 imaxpct; /* new inode space percentage limit */ +} __attribute__((packed)) compat_xfs_growfs_data_t; + +typedef struct compat_xfs_growfs_rt { + __u64 newblocks; /* new realtime size, fsblocks */ + __u32 extsize; /* new realtime extent size, fsblocks */ +} __attribute__((packed)) compat_xfs_growfs_rt_t; + +#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data) +#define XFS_IOC_FSGROWFSRT_32 _IOW('X', 112, struct compat_xfs_growfs_rt) + +#endif /* BROKEN_X86_ALIGNMENT */ + +#endif /* __XFS_IOCTL32_H__ */ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c new file mode 100644 index 0000000..b9c172b --- /dev/null +++ b/fs/xfs/xfs_iops.c @@ -0,0 +1,1210 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_acl.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_rw.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_vnodeops.h" +#include "xfs_inode_item.h" +#include "xfs_trace.h" + +#include +#include +#include +#include +#include +#include +#include + +/* + * Bring the timestamps in the XFS inode uptodate. + * + * Used before writing the inode to disk. + */ +void +xfs_synchronize_times( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; + ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; + ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; + ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; +} + +/* + * If the linux inode is valid, mark it dirty. + * Used when committing a dirty inode into a transaction so that + * the inode will get written back by the linux code + */ +void +xfs_mark_inode_dirty_sync( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) + mark_inode_dirty_sync(inode); +} + +void +xfs_mark_inode_dirty( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) + mark_inode_dirty(inode); +} + +/* + * Hook in SELinux. This is not quite correct yet, what we really need + * here (as we do for default ACLs) is a mechanism by which creation of + * these attrs can be journalled at inode creation time (along with the + * inode, of course, such that log replay can't cause these to be lost). + */ +STATIC int +xfs_init_security( + struct inode *inode, + struct inode *dir, + const struct qstr *qstr) +{ + struct xfs_inode *ip = XFS_I(inode); + size_t length; + void *value; + unsigned char *name; + int error; + + error = security_inode_init_security(inode, dir, qstr, (char **)&name, + &value, &length); + if (error) { + if (error == -EOPNOTSUPP) + return 0; + return -error; + } + + error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); + + kfree(name); + kfree(value); + return error; +} + +static void +xfs_dentry_to_name( + struct xfs_name *namep, + struct dentry *dentry) +{ + namep->name = dentry->d_name.name; + namep->len = dentry->d_name.len; +} + +STATIC void +xfs_cleanup_inode( + struct inode *dir, + struct inode *inode, + struct dentry *dentry) +{ + struct xfs_name teardown; + + /* Oh, the horror. + * If we can't add the ACL or we fail in + * xfs_init_security we must back out. + * ENOSPC can hit here, among other things. + */ + xfs_dentry_to_name(&teardown, dentry); + + xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); + iput(inode); +} + +STATIC int +xfs_vn_mknod( + struct inode *dir, + struct dentry *dentry, + int mode, + dev_t rdev) +{ + struct inode *inode; + struct xfs_inode *ip = NULL; + struct posix_acl *default_acl = NULL; + struct xfs_name name; + int error; + + /* + * Irix uses Missed'em'V split, but doesn't want to see + * the upper 5 bits of (14bit) major. + */ + if (S_ISCHR(mode) || S_ISBLK(mode)) { + if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) + return -EINVAL; + rdev = sysv_encode_dev(rdev); + } else { + rdev = 0; + } + + if (IS_POSIXACL(dir)) { + default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(default_acl)) + return PTR_ERR(default_acl); + + if (!default_acl) + mode &= ~current_umask(); + } + + xfs_dentry_to_name(&name, dentry); + error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); + if (unlikely(error)) + goto out_free_acl; + + inode = VFS_I(ip); + + error = xfs_init_security(inode, dir, &dentry->d_name); + if (unlikely(error)) + goto out_cleanup_inode; + + if (default_acl) { + error = -xfs_inherit_acl(inode, default_acl); + default_acl = NULL; + if (unlikely(error)) + goto out_cleanup_inode; + } + + + d_instantiate(dentry, inode); + return -error; + + out_cleanup_inode: + xfs_cleanup_inode(dir, inode, dentry); + out_free_acl: + posix_acl_release(default_acl); + return -error; +} + +STATIC int +xfs_vn_create( + struct inode *dir, + struct dentry *dentry, + int mode, + struct nameidata *nd) +{ + return xfs_vn_mknod(dir, dentry, mode, 0); +} + +STATIC int +xfs_vn_mkdir( + struct inode *dir, + struct dentry *dentry, + int mode) +{ + return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0); +} + +STATIC struct dentry * +xfs_vn_lookup( + struct inode *dir, + struct dentry *dentry, + struct nameidata *nd) +{ + struct xfs_inode *cip; + struct xfs_name name; + int error; + + if (dentry->d_name.len >= MAXNAMELEN) + return ERR_PTR(-ENAMETOOLONG); + + xfs_dentry_to_name(&name, dentry); + error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); + if (unlikely(error)) { + if (unlikely(error != ENOENT)) + return ERR_PTR(-error); + d_add(dentry, NULL); + return NULL; + } + + return d_splice_alias(VFS_I(cip), dentry); +} + +STATIC struct dentry * +xfs_vn_ci_lookup( + struct inode *dir, + struct dentry *dentry, + struct nameidata *nd) +{ + struct xfs_inode *ip; + struct xfs_name xname; + struct xfs_name ci_name; + struct qstr dname; + int error; + + if (dentry->d_name.len >= MAXNAMELEN) + return ERR_PTR(-ENAMETOOLONG); + + xfs_dentry_to_name(&xname, dentry); + error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); + if (unlikely(error)) { + if (unlikely(error != ENOENT)) + return ERR_PTR(-error); + /* + * call d_add(dentry, NULL) here when d_drop_negative_children + * is called in xfs_vn_mknod (ie. allow negative dentries + * with CI filesystems). + */ + return NULL; + } + + /* if exact match, just splice and exit */ + if (!ci_name.name) + return d_splice_alias(VFS_I(ip), dentry); + + /* else case-insensitive match... */ + dname.name = ci_name.name; + dname.len = ci_name.len; + dentry = d_add_ci(dentry, VFS_I(ip), &dname); + kmem_free(ci_name.name); + return dentry; +} + +STATIC int +xfs_vn_link( + struct dentry *old_dentry, + struct inode *dir, + struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + struct xfs_name name; + int error; + + xfs_dentry_to_name(&name, dentry); + + error = xfs_link(XFS_I(dir), XFS_I(inode), &name); + if (unlikely(error)) + return -error; + + ihold(inode); + d_instantiate(dentry, inode); + return 0; +} + +STATIC int +xfs_vn_unlink( + struct inode *dir, + struct dentry *dentry) +{ + struct xfs_name name; + int error; + + xfs_dentry_to_name(&name, dentry); + + error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); + if (error) + return error; + + /* + * With unlink, the VFS makes the dentry "negative": no inode, + * but still hashed. This is incompatible with case-insensitive + * mode, so invalidate (unhash) the dentry in CI-mode. + */ + if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb)) + d_invalidate(dentry); + return 0; +} + +STATIC int +xfs_vn_symlink( + struct inode *dir, + struct dentry *dentry, + const char *symname) +{ + struct inode *inode; + struct xfs_inode *cip = NULL; + struct xfs_name name; + int error; + mode_t mode; + + mode = S_IFLNK | + (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); + xfs_dentry_to_name(&name, dentry); + + error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); + if (unlikely(error)) + goto out; + + inode = VFS_I(cip); + + error = xfs_init_security(inode, dir, &dentry->d_name); + if (unlikely(error)) + goto out_cleanup_inode; + + d_instantiate(dentry, inode); + return 0; + + out_cleanup_inode: + xfs_cleanup_inode(dir, inode, dentry); + out: + return -error; +} + +STATIC int +xfs_vn_rename( + struct inode *odir, + struct dentry *odentry, + struct inode *ndir, + struct dentry *ndentry) +{ + struct inode *new_inode = ndentry->d_inode; + struct xfs_name oname; + struct xfs_name nname; + + xfs_dentry_to_name(&oname, odentry); + xfs_dentry_to_name(&nname, ndentry); + + return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), + XFS_I(ndir), &nname, new_inode ? + XFS_I(new_inode) : NULL); +} + +/* + * careful here - this function can get called recursively, so + * we need to be very careful about how much stack we use. + * uio is kmalloced for this reason... + */ +STATIC void * +xfs_vn_follow_link( + struct dentry *dentry, + struct nameidata *nd) +{ + char *link; + int error = -ENOMEM; + + link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); + if (!link) + goto out_err; + + error = -xfs_readlink(XFS_I(dentry->d_inode), link); + if (unlikely(error)) + goto out_kfree; + + nd_set_link(nd, link); + return NULL; + + out_kfree: + kfree(link); + out_err: + nd_set_link(nd, ERR_PTR(error)); + return NULL; +} + +STATIC void +xfs_vn_put_link( + struct dentry *dentry, + struct nameidata *nd, + void *p) +{ + char *s = nd_get_link(nd); + + if (!IS_ERR(s)) + kfree(s); +} + +STATIC int +xfs_vn_getattr( + struct vfsmount *mnt, + struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + trace_xfs_getattr(ip); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + stat->size = XFS_ISIZE(ip); + stat->dev = inode->i_sb->s_dev; + stat->mode = ip->i_d.di_mode; + stat->nlink = ip->i_d.di_nlink; + stat->uid = ip->i_d.di_uid; + stat->gid = ip->i_d.di_gid; + stat->ino = ip->i_ino; + stat->atime = inode->i_atime; + stat->mtime = inode->i_mtime; + stat->ctime = inode->i_ctime; + stat->blocks = + XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); + + + switch (inode->i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + stat->blksize = BLKDEV_IOSIZE; + stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, + sysv_minor(ip->i_df.if_u2.if_rdev)); + break; + default: + if (XFS_IS_REALTIME_INODE(ip)) { + /* + * If the file blocks are being allocated from a + * realtime volume, then return the inode's realtime + * extent size or the realtime volume's extent size. + */ + stat->blksize = + xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; + } else + stat->blksize = xfs_preferred_iosize(mp); + stat->rdev = 0; + break; + } + + return 0; +} + +int +xfs_setattr_nonsize( + struct xfs_inode *ip, + struct iattr *iattr, + int flags) +{ + xfs_mount_t *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); + int mask = iattr->ia_valid; + xfs_trans_t *tp; + int error; + uid_t uid = 0, iuid = 0; + gid_t gid = 0, igid = 0; + struct xfs_dquot *udqp = NULL, *gdqp = NULL; + struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; + + trace_xfs_setattr(ip); + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + error = -inode_change_ok(inode, iattr); + if (error) + return XFS_ERROR(error); + + ASSERT((mask & ATTR_SIZE) == 0); + + /* + * If disk quotas is on, we make sure that the dquots do exist on disk, + * before we start any other transactions. Trying to do this later + * is messy. We don't care to take a readlock to look at the ids + * in inode here, because we can't hold it across the trans_reserve. + * If the IDs do change before we take the ilock, we're covered + * because the i_*dquot fields will get updated anyway. + */ + if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { + uint qflags = 0; + + if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { + uid = iattr->ia_uid; + qflags |= XFS_QMOPT_UQUOTA; + } else { + uid = ip->i_d.di_uid; + } + if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { + gid = iattr->ia_gid; + qflags |= XFS_QMOPT_GQUOTA; + } else { + gid = ip->i_d.di_gid; + } + + /* + * We take a reference when we initialize udqp and gdqp, + * so it is important that we never blindly double trip on + * the same variable. See xfs_create() for an example. + */ + ASSERT(udqp == NULL); + ASSERT(gdqp == NULL); + error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), + qflags, &udqp, &gdqp); + if (error) + return error; + } + + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); + error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + if (error) + goto out_dqrele; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Change file ownership. Must be the owner or privileged. + */ + if (mask & (ATTR_UID|ATTR_GID)) { + /* + * These IDs could have changed since we last looked at them. + * But, we're assured that if the ownership did change + * while we didn't have the inode locked, inode's dquot(s) + * would have changed also. + */ + iuid = ip->i_d.di_uid; + igid = ip->i_d.di_gid; + gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; + uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; + + /* + * Do a quota reservation only if uid/gid is actually + * going to change. + */ + if (XFS_IS_QUOTA_RUNNING(mp) && + ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || + (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { + ASSERT(tp); + error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, + capable(CAP_FOWNER) ? + XFS_QMOPT_FORCE_RES : 0); + if (error) /* out of quota */ + goto out_trans_cancel; + } + } + + xfs_trans_ijoin(tp, ip); + + /* + * Change file ownership. Must be the owner or privileged. + */ + if (mask & (ATTR_UID|ATTR_GID)) { + /* + * CAP_FSETID overrides the following restrictions: + * + * The set-user-ID and set-group-ID bits of a file will be + * cleared upon successful return from chown() + */ + if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && + !capable(CAP_FSETID)) + ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); + + /* + * Change the ownerships and register quota modifications + * in the transaction. + */ + if (iuid != uid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { + ASSERT(mask & ATTR_UID); + ASSERT(udqp); + olddquot1 = xfs_qm_vop_chown(tp, ip, + &ip->i_udquot, udqp); + } + ip->i_d.di_uid = uid; + inode->i_uid = uid; + } + if (igid != gid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { + ASSERT(!XFS_IS_PQUOTA_ON(mp)); + ASSERT(mask & ATTR_GID); + ASSERT(gdqp); + olddquot2 = xfs_qm_vop_chown(tp, ip, + &ip->i_gdquot, gdqp); + } + ip->i_d.di_gid = gid; + inode->i_gid = gid; + } + } + + /* + * Change file access modes. + */ + if (mask & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + + ip->i_d.di_mode &= S_IFMT; + ip->i_d.di_mode |= mode & ~S_IFMT; + + inode->i_mode &= S_IFMT; + inode->i_mode |= mode & ~S_IFMT; + } + + /* + * Change file access or modified times. + */ + if (mask & ATTR_ATIME) { + inode->i_atime = iattr->ia_atime; + ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; + ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_CTIME) { + inode->i_ctime = iattr->ia_ctime; + ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + ip->i_update_core = 1; + } + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + XFS_STATS_INC(xs_ig_attrchg); + + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + /* + * Release any dquot(s) the inode had kept before chown. + */ + xfs_qm_dqrele(olddquot1); + xfs_qm_dqrele(olddquot2); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + + if (error) + return XFS_ERROR(error); + + /* + * XXX(hch): Updating the ACL entries is not atomic vs the i_mode + * update. We could avoid this with linked transactions + * and passing down the transaction pointer all the way + * to attr_set. No previous user of the generic + * Posix ACL code seems to care about this issue either. + */ + if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { + error = -xfs_acl_chmod(inode); + if (error) + return XFS_ERROR(error); + } + + return 0; + +out_trans_cancel: + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); +out_dqrele: + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + return error; +} + +/* + * Truncate file. Must have write permission and not be a directory. + */ +int +xfs_setattr_size( + struct xfs_inode *ip, + struct iattr *iattr, + int flags) +{ + struct xfs_mount *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); + int mask = iattr->ia_valid; + struct xfs_trans *tp; + int error; + uint lock_flags; + uint commit_flags = 0; + + trace_xfs_setattr(ip); + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + error = -inode_change_ok(inode, iattr); + if (error) + return XFS_ERROR(error); + + ASSERT(S_ISREG(ip->i_d.di_mode)); + ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| + ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| + ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); + + lock_flags = XFS_ILOCK_EXCL; + if (!(flags & XFS_ATTR_NOLOCK)) + lock_flags |= XFS_IOLOCK_EXCL; + xfs_ilock(ip, lock_flags); + + /* + * Short circuit the truncate case for zero length files. + */ + if (iattr->ia_size == 0 && + ip->i_size == 0 && ip->i_d.di_nextents == 0) { + if (!(mask & (ATTR_CTIME|ATTR_MTIME))) + goto out_unlock; + + /* + * Use the regular setattr path to update the timestamps. + */ + xfs_iunlock(ip, lock_flags); + iattr->ia_valid &= ~ATTR_SIZE; + return xfs_setattr_nonsize(ip, iattr, 0); + } + + /* + * Make sure that the dquots are attached to the inode. + */ + error = xfs_qm_dqattach_locked(ip, 0); + if (error) + goto out_unlock; + + /* + * Now we can make the changes. Before we join the inode to the + * transaction, take care of the part of the truncation that must be + * done without the inode lock. This needs to be done before joining + * the inode to the transaction, because the inode cannot be unlocked + * once it is a part of the transaction. + */ + if (iattr->ia_size > ip->i_size) { + /* + * Do the first part of growing a file: zero any data in the + * last block that is beyond the old EOF. We need to do this + * before the inode is joined to the transaction to modify + * i_size. + */ + error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); + if (error) + goto out_unlock; + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + lock_flags &= ~XFS_ILOCK_EXCL; + + /* + * We are going to log the inode size change in this transaction so + * any previous writes that are beyond the on disk EOF and the new + * EOF that have not been written out need to be written here. If we + * do not write the data out, we expose ourselves to the null files + * problem. + * + * Only flush from the on disk size to the smaller of the in memory + * file size or the new size as that's the range we really care about + * here and prevents waiting for other data not within the range we + * care about here. + */ + if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { + error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, + XBF_ASYNC, FI_NONE); + if (error) + goto out_unlock; + } + + /* + * Wait for all I/O to complete. + */ + xfs_ioend_wait(ip); + + error = -block_truncate_page(inode->i_mapping, iattr->ia_size, + xfs_get_blocks); + if (error) + goto out_unlock; + + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); + error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT); + if (error) + goto out_trans_cancel; + + truncate_setsize(inode, iattr->ia_size); + + commit_flags = XFS_TRANS_RELEASE_LOG_RES; + lock_flags |= XFS_ILOCK_EXCL; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, ip); + + /* + * Only change the c/mtime if we are changing the size or we are + * explicitly asked to change it. This handles the semantic difference + * between truncate() and ftruncate() as implemented in the VFS. + * + * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a + * special case where we need to update the times despite not having + * these flags set. For all other operations the VFS set these flags + * explicitly if it wants a timestamp update. + */ + if (iattr->ia_size != ip->i_size && + (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { + iattr->ia_ctime = iattr->ia_mtime = + current_fs_time(inode->i_sb); + mask |= ATTR_CTIME | ATTR_MTIME; + } + + if (iattr->ia_size > ip->i_size) { + ip->i_d.di_size = iattr->ia_size; + ip->i_size = iattr->ia_size; + } else if (iattr->ia_size <= ip->i_size || + (iattr->ia_size == 0 && ip->i_d.di_nextents)) { + error = xfs_itruncate_data(&tp, ip, iattr->ia_size); + if (error) + goto out_trans_abort; + + /* + * Truncated "down", so we're removing references to old data + * here - if we delay flushing for a long time, we expose + * ourselves unduly to the notorious NULL files problem. So, + * we mark this inode and flush it when the file is closed, + * and do not wait the usual (long) time for writeout. + */ + xfs_iflags_set(ip, XFS_ITRUNCATED); + } + + if (mask & ATTR_CTIME) { + inode->i_ctime = iattr->ia_ctime; + ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + ip->i_update_core = 1; + } + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + XFS_STATS_INC(xs_ig_attrchg); + + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +out_unlock: + if (lock_flags) + xfs_iunlock(ip, lock_flags); + return error; + +out_trans_abort: + commit_flags |= XFS_TRANS_ABORT; +out_trans_cancel: + xfs_trans_cancel(tp, commit_flags); + goto out_unlock; +} + +STATIC int +xfs_vn_setattr( + struct dentry *dentry, + struct iattr *iattr) +{ + if (iattr->ia_valid & ATTR_SIZE) + return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); + return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); +} + +#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) + +/* + * Call fiemap helper to fill in user data. + * Returns positive errors to xfs_getbmap. + */ +STATIC int +xfs_fiemap_format( + void **arg, + struct getbmapx *bmv, + int *full) +{ + int error; + struct fiemap_extent_info *fieinfo = *arg; + u32 fiemap_flags = 0; + u64 logical, physical, length; + + /* Do nothing for a hole */ + if (bmv->bmv_block == -1LL) + return 0; + + logical = BBTOB(bmv->bmv_offset); + physical = BBTOB(bmv->bmv_block); + length = BBTOB(bmv->bmv_length); + + if (bmv->bmv_oflags & BMV_OF_PREALLOC) + fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; + else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { + fiemap_flags |= FIEMAP_EXTENT_DELALLOC; + physical = 0; /* no block yet */ + } + if (bmv->bmv_oflags & BMV_OF_LAST) + fiemap_flags |= FIEMAP_EXTENT_LAST; + + error = fiemap_fill_next_extent(fieinfo, logical, physical, + length, fiemap_flags); + if (error > 0) { + error = 0; + *full = 1; /* user array now full */ + } + + return -error; +} + +STATIC int +xfs_vn_fiemap( + struct inode *inode, + struct fiemap_extent_info *fieinfo, + u64 start, + u64 length) +{ + xfs_inode_t *ip = XFS_I(inode); + struct getbmapx bm; + int error; + + error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS); + if (error) + return error; + + /* Set up bmap header for xfs internal routine */ + bm.bmv_offset = BTOBB(start); + /* Special case for whole file */ + if (length == FIEMAP_MAX_OFFSET) + bm.bmv_length = -1LL; + else + bm.bmv_length = BTOBB(length); + + /* We add one because in getbmap world count includes the header */ + bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM : + fieinfo->fi_extents_max + 1; + bm.bmv_count = min_t(__s32, bm.bmv_count, + (PAGE_SIZE * 16 / sizeof(struct getbmapx))); + bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) + bm.bmv_iflags |= BMV_IF_ATTRFORK; + if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) + bm.bmv_iflags |= BMV_IF_DELALLOC; + + error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); + if (error) + return -error; + + return 0; +} + +static const struct inode_operations xfs_inode_operations = { + .get_acl = xfs_get_acl, + .getattr = xfs_vn_getattr, + .setattr = xfs_vn_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = xfs_vn_listxattr, + .fiemap = xfs_vn_fiemap, +}; + +static const struct inode_operations xfs_dir_inode_operations = { + .create = xfs_vn_create, + .lookup = xfs_vn_lookup, + .link = xfs_vn_link, + .unlink = xfs_vn_unlink, + .symlink = xfs_vn_symlink, + .mkdir = xfs_vn_mkdir, + /* + * Yes, XFS uses the same method for rmdir and unlink. + * + * There are some subtile differences deeper in the code, + * but we use S_ISDIR to check for those. + */ + .rmdir = xfs_vn_unlink, + .mknod = xfs_vn_mknod, + .rename = xfs_vn_rename, + .get_acl = xfs_get_acl, + .getattr = xfs_vn_getattr, + .setattr = xfs_vn_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = xfs_vn_listxattr, +}; + +static const struct inode_operations xfs_dir_ci_inode_operations = { + .create = xfs_vn_create, + .lookup = xfs_vn_ci_lookup, + .link = xfs_vn_link, + .unlink = xfs_vn_unlink, + .symlink = xfs_vn_symlink, + .mkdir = xfs_vn_mkdir, + /* + * Yes, XFS uses the same method for rmdir and unlink. + * + * There are some subtile differences deeper in the code, + * but we use S_ISDIR to check for those. + */ + .rmdir = xfs_vn_unlink, + .mknod = xfs_vn_mknod, + .rename = xfs_vn_rename, + .get_acl = xfs_get_acl, + .getattr = xfs_vn_getattr, + .setattr = xfs_vn_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = xfs_vn_listxattr, +}; + +static const struct inode_operations xfs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = xfs_vn_follow_link, + .put_link = xfs_vn_put_link, + .get_acl = xfs_get_acl, + .getattr = xfs_vn_getattr, + .setattr = xfs_vn_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = xfs_vn_listxattr, +}; + +STATIC void +xfs_diflags_to_iflags( + struct inode *inode, + struct xfs_inode *ip) +{ + if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; + if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) + inode->i_flags |= S_SYNC; + else + inode->i_flags &= ~S_SYNC; + if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) + inode->i_flags |= S_NOATIME; + else + inode->i_flags &= ~S_NOATIME; +} + +/* + * Initialize the Linux inode, set up the operation vectors and + * unlock the inode. + * + * When reading existing inodes from disk this is called directly + * from xfs_iget, when creating a new inode it is called from + * xfs_ialloc after setting up the inode. + * + * We are always called with an uninitialised linux inode here. + * We need to initialise the necessary fields and take a reference + * on it. + */ +void +xfs_setup_inode( + struct xfs_inode *ip) +{ + struct inode *inode = &ip->i_vnode; + + inode->i_ino = ip->i_ino; + inode->i_state = I_NEW; + + inode_sb_list_add(inode); + /* make the inode look hashed for the writeback code */ + hlist_add_fake(&inode->i_hash); + + inode->i_mode = ip->i_d.di_mode; + inode->i_nlink = ip->i_d.di_nlink; + inode->i_uid = ip->i_d.di_uid; + inode->i_gid = ip->i_d.di_gid; + + switch (inode->i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + inode->i_rdev = + MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, + sysv_minor(ip->i_df.if_u2.if_rdev)); + break; + default: + inode->i_rdev = 0; + break; + } + + inode->i_generation = ip->i_d.di_gen; + i_size_write(inode, ip->i_d.di_size); + inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; + inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; + inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; + inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; + inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; + inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; + xfs_diflags_to_iflags(inode, ip); + + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + inode->i_op = &xfs_inode_operations; + inode->i_fop = &xfs_file_operations; + inode->i_mapping->a_ops = &xfs_address_space_operations; + break; + case S_IFDIR: + if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) + inode->i_op = &xfs_dir_ci_inode_operations; + else + inode->i_op = &xfs_dir_inode_operations; + inode->i_fop = &xfs_dir_file_operations; + break; + case S_IFLNK: + inode->i_op = &xfs_symlink_inode_operations; + if (!(ip->i_df.if_flags & XFS_IFINLINE)) + inode->i_mapping->a_ops = &xfs_address_space_operations; + break; + default: + inode->i_op = &xfs_inode_operations; + init_special_inode(inode, inode->i_mode, inode->i_rdev); + break; + } + + /* + * If there is no attribute fork no ACL can exist on this inode, + * and it can't have any file capabilities attached to it either. + */ + if (!XFS_IFORK_Q(ip)) { + inode_has_no_xattr(inode); + cache_no_acl(inode); + } + + xfs_iflags_clear(ip, XFS_INEW); + barrier(); + + unlock_new_inode(inode); +} diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h new file mode 100644 index 0000000..ef41c92 --- /dev/null +++ b/fs/xfs/xfs_iops.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_IOPS_H__ +#define __XFS_IOPS_H__ + +struct xfs_inode; + +extern const struct file_operations xfs_file_operations; +extern const struct file_operations xfs_dir_file_operations; + +extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); + +extern void xfs_setup_inode(struct xfs_inode *); + +#endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h new file mode 100644 index 0000000..1e8a45e --- /dev/null +++ b/fs/xfs/xfs_linux.h @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_LINUX__ +#define __XFS_LINUX__ + +#include + +/* + * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. + * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set. + */ +#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64) +# define XFS_BIG_BLKNOS 1 +# define XFS_BIG_INUMS 1 +#else +# define XFS_BIG_BLKNOS 0 +# define XFS_BIG_INUMS 0 +#endif + +#include "xfs_types.h" + +#include "kmem.h" +#include "mrlock.h" +#include "time.h" +#include "uuid.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "xfs_vnode.h" +#include "xfs_stats.h" +#include "xfs_sysctl.h" +#include "xfs_iops.h" +#include "xfs_aops.h" +#include "xfs_super.h" +#include "xfs_buf.h" +#include "xfs_message.h" + +#ifdef __BIG_ENDIAN +#define XFS_NATIVE_HOST 1 +#else +#undef XFS_NATIVE_HOST +#endif + +/* + * Feature macros (disable/enable) + */ +#ifdef CONFIG_SMP +#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ +#else +#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ +#endif + +#define irix_sgid_inherit xfs_params.sgid_inherit.val +#define irix_symlink_mode xfs_params.symlink_mode.val +#define xfs_panic_mask xfs_params.panic_mask.val +#define xfs_error_level xfs_params.error_level.val +#define xfs_syncd_centisecs xfs_params.syncd_timer.val +#define xfs_stats_clear xfs_params.stats_clear.val +#define xfs_inherit_sync xfs_params.inherit_sync.val +#define xfs_inherit_nodump xfs_params.inherit_nodump.val +#define xfs_inherit_noatime xfs_params.inherit_noatim.val +#define xfs_buf_timer_centisecs xfs_params.xfs_buf_timer.val +#define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val +#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val +#define xfs_rotorstep xfs_params.rotorstep.val +#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val +#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val + +#define current_cpu() (raw_smp_processor_id()) +#define current_pid() (current->pid) +#define current_test_flags(f) (current->flags & (f)) +#define current_set_flags_nested(sp, f) \ + (*(sp) = current->flags, current->flags |= (f)) +#define current_clear_flags_nested(sp, f) \ + (*(sp) = current->flags, current->flags &= ~(f)) +#define current_restore_flags_nested(sp, f) \ + (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) + +#define spinlock_destroy(lock) + +#define NBBY 8 /* number of bits per byte */ + +/* + * Size of block device i/o is parameterized here. + * Currently the system supports page-sized i/o. + */ +#define BLKDEV_IOSHIFT PAGE_CACHE_SHIFT +#define BLKDEV_IOSIZE (1<> 32; + __low = c; + if (__high) { + __upper = __high % (b); + __high = __high / (b); + } + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); + asm("":"=A" (c):"a" (__low),"d" (__high)); + *(__u64 *)a = c; + return __mod; + } + } + + /* NOTREACHED */ + return 0; +} + +/* Side effect free 64 bit mod operation */ +static inline __u32 xfs_do_mod(void *a, __u32 b, int n) +{ + switch (n) { + case 4: + return *(__u32 *)a % b; + case 8: + { + unsigned long __upper, __low, __high, __mod; + __u64 c = *(__u64 *)a; + __upper = __high = c >> 32; + __low = c; + if (__high) { + __upper = __high % (b); + __high = __high / (b); + } + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); + asm("":"=A" (c):"a" (__low),"d" (__high)); + return __mod; + } + } + + /* NOTREACHED */ + return 0; +} +#else +static inline __u32 xfs_do_div(void *a, __u32 b, int n) +{ + __u32 mod; + + switch (n) { + case 4: + mod = *(__u32 *)a % b; + *(__u32 *)a = *(__u32 *)a / b; + return mod; + case 8: + mod = do_div(*(__u64 *)a, b); + return mod; + } + + /* NOTREACHED */ + return 0; +} + +/* Side effect free 64 bit mod operation */ +static inline __u32 xfs_do_mod(void *a, __u32 b, int n) +{ + switch (n) { + case 4: + return *(__u32 *)a % b; + case 8: + { + __u64 c = *(__u64 *)a; + return do_div(c, b); + } + } + + /* NOTREACHED */ + return 0; +} +#endif + +#undef do_div +#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a)) +#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a)) + +static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) +{ + x += y - 1; + do_div(x, y); + return(x * y); +} + +static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) +{ + x += y - 1; + do_div(x, y); + return x; +} + +/* ARM old ABI has some weird alignment/padding */ +#if defined(__arm__) && !defined(__ARM_EABI__) +#define __arch_pack __attribute__((packed)) +#else +#define __arch_pack +#endif + +#define ASSERT_ALWAYS(expr) \ + (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + +#ifndef DEBUG +#define ASSERT(expr) ((void)0) + +#ifndef STATIC +# define STATIC static noinline +#endif + +#else /* DEBUG */ + +#define ASSERT(expr) \ + (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + +#ifndef STATIC +# define STATIC noinline +#endif + +#endif /* DEBUG */ + +#endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c new file mode 100644 index 0000000..bd672de --- /dev/null +++ b/fs/xfs/xfs_message.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" + +/* + * XFS logging functions + */ +static void +__xfs_printk( + const char *level, + const struct xfs_mount *mp, + struct va_format *vaf) +{ + if (mp && mp->m_fsname) { + printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); + return; + } + printk("%sXFS: %pV\n", level, vaf); +} + +#define define_xfs_printk_level(func, kern_level) \ +void func(const struct xfs_mount *mp, const char *fmt, ...) \ +{ \ + struct va_format vaf; \ + va_list args; \ + \ + va_start(args, fmt); \ + \ + vaf.fmt = fmt; \ + vaf.va = &args; \ + \ + __xfs_printk(kern_level, mp, &vaf); \ + va_end(args); \ +} \ + +define_xfs_printk_level(xfs_emerg, KERN_EMERG); +define_xfs_printk_level(xfs_alert, KERN_ALERT); +define_xfs_printk_level(xfs_crit, KERN_CRIT); +define_xfs_printk_level(xfs_err, KERN_ERR); +define_xfs_printk_level(xfs_warn, KERN_WARNING); +define_xfs_printk_level(xfs_notice, KERN_NOTICE); +define_xfs_printk_level(xfs_info, KERN_INFO); +#ifdef DEBUG +define_xfs_printk_level(xfs_debug, KERN_DEBUG); +#endif + +void +xfs_alert_tag( + const struct xfs_mount *mp, + int panic_tag, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + int do_panic = 0; + + if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { + xfs_alert(mp, "Transforming an alert into a BUG."); + do_panic = 1; + } + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + __xfs_printk(KERN_ALERT, mp, &vaf); + va_end(args); + + BUG_ON(do_panic); +} + +void +assfail(char *expr, char *file, int line) +{ + xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d", + expr, file, line); + BUG(); +} + +void +xfs_hex_dump(void *p, int length) +{ + print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); +} diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h new file mode 100644 index 0000000..7fb7ea0 --- /dev/null +++ b/fs/xfs/xfs_message.h @@ -0,0 +1,39 @@ +#ifndef __XFS_MESSAGE_H +#define __XFS_MESSAGE_H 1 + +struct xfs_mount; + +extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, + const char *fmt, ...) + __attribute__ ((format (printf, 3, 4))); +extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); + +#ifdef DEBUG +extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +#else +static inline void +__attribute__ ((format (printf, 2, 3))) +xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) +{ +} +#endif + +extern void assfail(char *expr, char *f, int l); + +extern void xfs_hex_dump(void *p, int length); + +#endif /* __XFS_MESSAGE_H */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c new file mode 100644 index 0000000..9a0aa76 --- /dev/null +++ b/fs/xfs/xfs_qm.c @@ -0,0 +1,2416 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_ialloc.h" +#include "xfs_itable.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_bmap.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_trans_space.h" +#include "xfs_utils.h" +#include "xfs_qm.h" +#include "xfs_trace.h" + +/* + * The global quota manager. There is only one of these for the entire + * system, _not_ one per file system. XQM keeps track of the overall + * quota functionality, including maintaining the freelist and hash + * tables of dquots. + */ +struct mutex xfs_Gqm_lock; +struct xfs_qm *xfs_Gqm; +uint ndquot; + +kmem_zone_t *qm_dqzone; +kmem_zone_t *qm_dqtrxzone; + +STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); +STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); + +STATIC int xfs_qm_init_quotainos(xfs_mount_t *); +STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); +STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); + +static struct shrinker xfs_qm_shaker = { + .shrink = xfs_qm_shake, + .seeks = DEFAULT_SEEKS, +}; + +/* + * Initialize the XQM structure. + * Note that there is not one quota manager per file system. + */ +STATIC struct xfs_qm * +xfs_Gqm_init(void) +{ + xfs_dqhash_t *udqhash, *gdqhash; + xfs_qm_t *xqm; + size_t hsize; + uint i; + + /* + * Initialize the dquot hash tables. + */ + udqhash = kmem_zalloc_greedy(&hsize, + XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), + XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t)); + if (!udqhash) + goto out; + + gdqhash = kmem_zalloc_large(hsize); + if (!gdqhash) + goto out_free_udqhash; + + hsize /= sizeof(xfs_dqhash_t); + ndquot = hsize << 8; + + xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); + xqm->qm_dqhashmask = hsize - 1; + xqm->qm_usr_dqhtable = udqhash; + xqm->qm_grp_dqhtable = gdqhash; + ASSERT(xqm->qm_usr_dqhtable != NULL); + ASSERT(xqm->qm_grp_dqhtable != NULL); + + for (i = 0; i < hsize; i++) { + xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i); + xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i); + } + + /* + * Freelist of all dquots of all file systems + */ + INIT_LIST_HEAD(&xqm->qm_dqfrlist); + xqm->qm_dqfrlist_cnt = 0; + mutex_init(&xqm->qm_dqfrlist_lock); + + /* + * dquot zone. we register our own low-memory callback. + */ + if (!qm_dqzone) { + xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t), + "xfs_dquots"); + qm_dqzone = xqm->qm_dqzone; + } else + xqm->qm_dqzone = qm_dqzone; + + register_shrinker(&xfs_qm_shaker); + + /* + * The t_dqinfo portion of transactions. + */ + if (!qm_dqtrxzone) { + xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t), + "xfs_dqtrx"); + qm_dqtrxzone = xqm->qm_dqtrxzone; + } else + xqm->qm_dqtrxzone = qm_dqtrxzone; + + atomic_set(&xqm->qm_totaldquots, 0); + xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; + xqm->qm_nrefs = 0; + return xqm; + + out_free_udqhash: + kmem_free_large(udqhash); + out: + return NULL; +} + +/* + * Destroy the global quota manager when its reference count goes to zero. + */ +STATIC void +xfs_qm_destroy( + struct xfs_qm *xqm) +{ + struct xfs_dquot *dqp, *n; + int hsize, i; + + ASSERT(xqm != NULL); + ASSERT(xqm->qm_nrefs == 0); + unregister_shrinker(&xfs_qm_shaker); + hsize = xqm->qm_dqhashmask + 1; + for (i = 0; i < hsize; i++) { + xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); + xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); + } + kmem_free_large(xqm->qm_usr_dqhtable); + kmem_free_large(xqm->qm_grp_dqhtable); + xqm->qm_usr_dqhtable = NULL; + xqm->qm_grp_dqhtable = NULL; + xqm->qm_dqhashmask = 0; + + /* frlist cleanup */ + mutex_lock(&xqm->qm_dqfrlist_lock); + list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { + xfs_dqlock(dqp); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + xfs_dqunlock(dqp); + xfs_qm_dqdestroy(dqp); + } + mutex_unlock(&xqm->qm_dqfrlist_lock); + mutex_destroy(&xqm->qm_dqfrlist_lock); + kmem_free(xqm); +} + +/* + * Called at mount time to let XQM know that another file system is + * starting quotas. This isn't crucial information as the individual mount + * structures are pretty independent, but it helps the XQM keep a + * global view of what's going on. + */ +/* ARGSUSED */ +STATIC int +xfs_qm_hold_quotafs_ref( + struct xfs_mount *mp) +{ + /* + * Need to lock the xfs_Gqm structure for things like this. For example, + * the structure could disappear between the entry to this routine and + * a HOLD operation if not locked. + */ + mutex_lock(&xfs_Gqm_lock); + + if (!xfs_Gqm) { + xfs_Gqm = xfs_Gqm_init(); + if (!xfs_Gqm) { + mutex_unlock(&xfs_Gqm_lock); + return ENOMEM; + } + } + + /* + * We can keep a list of all filesystems with quotas mounted for + * debugging and statistical purposes, but ... + * Just take a reference and get out. + */ + xfs_Gqm->qm_nrefs++; + mutex_unlock(&xfs_Gqm_lock); + + return 0; +} + + +/* + * Release the reference that a filesystem took at mount time, + * so that we know when we need to destroy the entire quota manager. + */ +/* ARGSUSED */ +STATIC void +xfs_qm_rele_quotafs_ref( + struct xfs_mount *mp) +{ + xfs_dquot_t *dqp, *n; + + ASSERT(xfs_Gqm); + ASSERT(xfs_Gqm->qm_nrefs > 0); + + /* + * Go thru the freelist and destroy all inactive dquots. + */ + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + + list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) { + xfs_dqlock(dqp); + if (dqp->dq_flags & XFS_DQ_INACTIVE) { + ASSERT(dqp->q_mount == NULL); + ASSERT(! XFS_DQ_IS_DIRTY(dqp)); + ASSERT(list_empty(&dqp->q_hashlist)); + ASSERT(list_empty(&dqp->q_mplist)); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + xfs_dqunlock(dqp); + xfs_qm_dqdestroy(dqp); + } else { + xfs_dqunlock(dqp); + } + } + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); + + /* + * Destroy the entire XQM. If somebody mounts with quotaon, this'll + * be restarted. + */ + mutex_lock(&xfs_Gqm_lock); + if (--xfs_Gqm->qm_nrefs == 0) { + xfs_qm_destroy(xfs_Gqm); + xfs_Gqm = NULL; + } + mutex_unlock(&xfs_Gqm_lock); +} + +/* + * Just destroy the quotainfo structure. + */ +void +xfs_qm_unmount( + struct xfs_mount *mp) +{ + if (mp->m_quotainfo) { + xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); + xfs_qm_destroy_quotainfo(mp); + } +} + + +/* + * This is called from xfs_mountfs to start quotas and initialize all + * necessary data structures like quotainfo. This is also responsible for + * running a quotacheck as necessary. We are guaranteed that the superblock + * is consistently read in at this point. + * + * If we fail here, the mount will continue with quota turned off. We don't + * need to inidicate success or failure at all. + */ +void +xfs_qm_mount_quotas( + xfs_mount_t *mp) +{ + int error = 0; + uint sbf; + + /* + * If quotas on realtime volumes is not supported, we disable + * quotas immediately. + */ + if (mp->m_sb.sb_rextents) { + xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); + mp->m_qflags = 0; + goto write_changes; + } + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * Allocate the quotainfo structure inside the mount struct, and + * create quotainode(s), and change/rev superblock if necessary. + */ + error = xfs_qm_init_quotainfo(mp); + if (error) { + /* + * We must turn off quotas. + */ + ASSERT(mp->m_quotainfo == NULL); + mp->m_qflags = 0; + goto write_changes; + } + /* + * If any of the quotas are not consistent, do a quotacheck. + */ + if (XFS_QM_NEED_QUOTACHECK(mp)) { + error = xfs_qm_quotacheck(mp); + if (error) { + /* Quotacheck failed and disabled quotas. */ + return; + } + } + /* + * If one type of quotas is off, then it will lose its + * quotachecked status, since we won't be doing accounting for + * that type anymore. + */ + if (!XFS_IS_UQUOTA_ON(mp)) + mp->m_qflags &= ~XFS_UQUOTA_CHKD; + if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) + mp->m_qflags &= ~XFS_OQUOTA_CHKD; + + write_changes: + /* + * We actually don't have to acquire the m_sb_lock at all. + * This can only be called from mount, and that's single threaded. XXX + */ + spin_lock(&mp->m_sb_lock); + sbf = mp->m_sb.sb_qflags; + mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; + spin_unlock(&mp->m_sb_lock); + + if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { + if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { + /* + * We could only have been turning quotas off. + * We aren't in very good shape actually because + * the incore structures are convinced that quotas are + * off, but the on disk superblock doesn't know that ! + */ + ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); + xfs_alert(mp, "%s: Superblock update failed!", + __func__); + } + } + + if (error) { + xfs_warn(mp, "Failed to initialize disk quotas."); + return; + } +} + +/* + * Called from the vfsops layer. + */ +void +xfs_qm_unmount_quotas( + xfs_mount_t *mp) +{ + /* + * Release the dquots that root inode, et al might be holding, + * before we flush quotas and blow away the quotainfo structure. + */ + ASSERT(mp->m_rootip); + xfs_qm_dqdetach(mp->m_rootip); + if (mp->m_rbmip) + xfs_qm_dqdetach(mp->m_rbmip); + if (mp->m_rsumip) + xfs_qm_dqdetach(mp->m_rsumip); + + /* + * Release the quota inodes. + */ + if (mp->m_quotainfo) { + if (mp->m_quotainfo->qi_uquotaip) { + IRELE(mp->m_quotainfo->qi_uquotaip); + mp->m_quotainfo->qi_uquotaip = NULL; + } + if (mp->m_quotainfo->qi_gquotaip) { + IRELE(mp->m_quotainfo->qi_gquotaip); + mp->m_quotainfo->qi_gquotaip = NULL; + } + } +} + +/* + * Flush all dquots of the given file system to disk. The dquots are + * _not_ purged from memory here, just their data written to disk. + */ +STATIC int +xfs_qm_dqflush_all( + struct xfs_mount *mp, + int sync_mode) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + int recl; + struct xfs_dquot *dqp; + int error; + + if (!q) + return 0; +again: + mutex_lock(&q->qi_dqlist_lock); + list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { + xfs_dqlock(dqp); + if (! XFS_DQ_IS_DIRTY(dqp)) { + xfs_dqunlock(dqp); + continue; + } + + /* XXX a sentinel would be better */ + recl = q->qi_dqreclaims; + if (!xfs_dqflock_nowait(dqp)) { + /* + * If we can't grab the flush lock then check + * to see if the dquot has been flushed delayed + * write. If so, grab its buffer and send it + * out immediately. We'll be able to acquire + * the flush lock when the I/O completes. + */ + xfs_qm_dqflock_pushbuf_wait(dqp); + } + /* + * Let go of the mplist lock. We don't want to hold it + * across a disk write. + */ + mutex_unlock(&q->qi_dqlist_lock); + error = xfs_qm_dqflush(dqp, sync_mode); + xfs_dqunlock(dqp); + if (error) + return error; + + mutex_lock(&q->qi_dqlist_lock); + if (recl != q->qi_dqreclaims) { + mutex_unlock(&q->qi_dqlist_lock); + /* XXX restart limit */ + goto again; + } + } + + mutex_unlock(&q->qi_dqlist_lock); + /* return ! busy */ + return 0; +} +/* + * Release the group dquot pointers the user dquots may be + * carrying around as a hint. mplist is locked on entry and exit. + */ +STATIC void +xfs_qm_detach_gdquots( + struct xfs_mount *mp) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + struct xfs_dquot *dqp, *gdqp; + int nrecl; + + again: + ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); + list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { + xfs_dqlock(dqp); + if ((gdqp = dqp->q_gdquot)) { + xfs_dqlock(gdqp); + dqp->q_gdquot = NULL; + } + xfs_dqunlock(dqp); + + if (gdqp) { + /* + * Can't hold the mplist lock across a dqput. + * XXXmust convert to marker based iterations here. + */ + nrecl = q->qi_dqreclaims; + mutex_unlock(&q->qi_dqlist_lock); + xfs_qm_dqput(gdqp); + + mutex_lock(&q->qi_dqlist_lock); + if (nrecl != q->qi_dqreclaims) + goto again; + } + } +} + +/* + * Go through all the incore dquots of this file system and take them + * off the mplist and hashlist, if the dquot type matches the dqtype + * parameter. This is used when turning off quota accounting for + * users and/or groups, as well as when the filesystem is unmounting. + */ +STATIC int +xfs_qm_dqpurge_int( + struct xfs_mount *mp, + uint flags) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + struct xfs_dquot *dqp, *n; + uint dqtype; + int nrecl; + int nmisses; + + if (!q) + return 0; + + dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; + dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; + dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; + + mutex_lock(&q->qi_dqlist_lock); + + /* + * In the first pass through all incore dquots of this filesystem, + * we release the group dquot pointers the user dquots may be + * carrying around as a hint. We need to do this irrespective of + * what's being turned off. + */ + xfs_qm_detach_gdquots(mp); + + again: + nmisses = 0; + ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); + /* + * Try to get rid of all of the unwanted dquots. The idea is to + * get them off mplist and hashlist, but leave them on freelist. + */ + list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) { + /* + * It's OK to look at the type without taking dqlock here. + * We're holding the mplist lock here, and that's needed for + * a dqreclaim. + */ + if ((dqp->dq_flags & dqtype) == 0) + continue; + + if (!mutex_trylock(&dqp->q_hash->qh_lock)) { + nrecl = q->qi_dqreclaims; + mutex_unlock(&q->qi_dqlist_lock); + mutex_lock(&dqp->q_hash->qh_lock); + mutex_lock(&q->qi_dqlist_lock); + + /* + * XXXTheoretically, we can get into a very long + * ping pong game here. + * No one can be adding dquots to the mplist at + * this point, but somebody might be taking things off. + */ + if (nrecl != q->qi_dqreclaims) { + mutex_unlock(&dqp->q_hash->qh_lock); + goto again; + } + } + + /* + * Take the dquot off the mplist and hashlist. It may remain on + * freelist in INACTIVE state. + */ + nmisses += xfs_qm_dqpurge(dqp); + } + mutex_unlock(&q->qi_dqlist_lock); + return nmisses; +} + +int +xfs_qm_dqpurge_all( + xfs_mount_t *mp, + uint flags) +{ + int ndquots; + + /* + * Purge the dquot cache. + * None of the dquots should really be busy at this point. + */ + if (mp->m_quotainfo) { + while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) { + delay(ndquots * 10); + } + } + return 0; +} + +STATIC int +xfs_qm_dqattach_one( + xfs_inode_t *ip, + xfs_dqid_t id, + uint type, + uint doalloc, + xfs_dquot_t *udqhint, /* hint */ + xfs_dquot_t **IO_idqpp) +{ + xfs_dquot_t *dqp; + int error; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + error = 0; + + /* + * See if we already have it in the inode itself. IO_idqpp is + * &i_udquot or &i_gdquot. This made the code look weird, but + * made the logic a lot simpler. + */ + dqp = *IO_idqpp; + if (dqp) { + trace_xfs_dqattach_found(dqp); + return 0; + } + + /* + * udqhint is the i_udquot field in inode, and is non-NULL only + * when the type arg is group/project. Its purpose is to save a + * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside + * the user dquot. + */ + if (udqhint) { + ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); + xfs_dqlock(udqhint); + + /* + * No need to take dqlock to look at the id. + * + * The ID can't change until it gets reclaimed, and it won't + * be reclaimed as long as we have a ref from inode and we + * hold the ilock. + */ + dqp = udqhint->q_gdquot; + if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { + xfs_dqlock(dqp); + XFS_DQHOLD(dqp); + ASSERT(*IO_idqpp == NULL); + *IO_idqpp = dqp; + + xfs_dqunlock(dqp); + xfs_dqunlock(udqhint); + return 0; + } + + /* + * We can't hold a dquot lock when we call the dqget code. + * We'll deadlock in no time, because of (not conforming to) + * lock ordering - the inodelock comes before any dquot lock, + * and we may drop and reacquire the ilock in xfs_qm_dqget(). + */ + xfs_dqunlock(udqhint); + } + + /* + * Find the dquot from somewhere. This bumps the + * reference count of dquot and returns it locked. + * This can return ENOENT if dquot didn't exist on + * disk and we didn't ask it to allocate; + * ESRCH if quotas got turned off suddenly. + */ + error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp); + if (error) + return error; + + trace_xfs_dqattach_get(dqp); + + /* + * dqget may have dropped and re-acquired the ilock, but it guarantees + * that the dquot returned is the one that should go in the inode. + */ + *IO_idqpp = dqp; + xfs_dqunlock(dqp); + return 0; +} + + +/* + * Given a udquot and gdquot, attach a ptr to the group dquot in the + * udquot as a hint for future lookups. The idea sounds simple, but the + * execution isn't, because the udquot might have a group dquot attached + * already and getting rid of that gets us into lock ordering constraints. + * The process is complicated more by the fact that the dquots may or may not + * be locked on entry. + */ +STATIC void +xfs_qm_dqattach_grouphint( + xfs_dquot_t *udq, + xfs_dquot_t *gdq) +{ + xfs_dquot_t *tmp; + + xfs_dqlock(udq); + + if ((tmp = udq->q_gdquot)) { + if (tmp == gdq) { + xfs_dqunlock(udq); + return; + } + + udq->q_gdquot = NULL; + /* + * We can't keep any dqlocks when calling dqrele, + * because the freelist lock comes before dqlocks. + */ + xfs_dqunlock(udq); + /* + * we took a hard reference once upon a time in dqget, + * so give it back when the udquot no longer points at it + * dqput() does the unlocking of the dquot. + */ + xfs_qm_dqrele(tmp); + + xfs_dqlock(udq); + xfs_dqlock(gdq); + + } else { + ASSERT(XFS_DQ_IS_LOCKED(udq)); + xfs_dqlock(gdq); + } + + ASSERT(XFS_DQ_IS_LOCKED(udq)); + ASSERT(XFS_DQ_IS_LOCKED(gdq)); + /* + * Somebody could have attached a gdquot here, + * when we dropped the uqlock. If so, just do nothing. + */ + if (udq->q_gdquot == NULL) { + XFS_DQHOLD(gdq); + udq->q_gdquot = gdq; + } + + xfs_dqunlock(gdq); + xfs_dqunlock(udq); +} + + +/* + * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON + * into account. + * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. + * Inode may get unlocked and relocked in here, and the caller must deal with + * the consequences. + */ +int +xfs_qm_dqattach_locked( + xfs_inode_t *ip, + uint flags) +{ + xfs_mount_t *mp = ip->i_mount; + uint nquotas = 0; + int error = 0; + + if (!XFS_IS_QUOTA_RUNNING(mp) || + !XFS_IS_QUOTA_ON(mp) || + !XFS_NOT_DQATTACHED(mp, ip) || + ip->i_ino == mp->m_sb.sb_uquotino || + ip->i_ino == mp->m_sb.sb_gquotino) + return 0; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (XFS_IS_UQUOTA_ON(mp)) { + error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, + flags & XFS_QMOPT_DQALLOC, + NULL, &ip->i_udquot); + if (error) + goto done; + nquotas++; + } + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + if (XFS_IS_OQUOTA_ON(mp)) { + error = XFS_IS_GQUOTA_ON(mp) ? + xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, + flags & XFS_QMOPT_DQALLOC, + ip->i_udquot, &ip->i_gdquot) : + xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, + flags & XFS_QMOPT_DQALLOC, + ip->i_udquot, &ip->i_gdquot); + /* + * Don't worry about the udquot that we may have + * attached above. It'll get detached, if not already. + */ + if (error) + goto done; + nquotas++; + } + + /* + * Attach this group quota to the user quota as a hint. + * This WON'T, in general, result in a thrash. + */ + if (nquotas == 2) { + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(ip->i_udquot); + ASSERT(ip->i_gdquot); + + /* + * We may or may not have the i_udquot locked at this point, + * but this check is OK since we don't depend on the i_gdquot to + * be accurate 100% all the time. It is just a hint, and this + * will succeed in general. + */ + if (ip->i_udquot->q_gdquot == ip->i_gdquot) + goto done; + /* + * Attach i_gdquot to the gdquot hint inside the i_udquot. + */ + xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); + } + + done: +#ifdef DEBUG + if (!error) { + if (XFS_IS_UQUOTA_ON(mp)) + ASSERT(ip->i_udquot); + if (XFS_IS_OQUOTA_ON(mp)) + ASSERT(ip->i_gdquot); + } + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); +#endif + return error; +} + +int +xfs_qm_dqattach( + struct xfs_inode *ip, + uint flags) +{ + int error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_qm_dqattach_locked(ip, flags); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + return error; +} + +/* + * Release dquots (and their references) if any. + * The inode should be locked EXCL except when this's called by + * xfs_ireclaim. + */ +void +xfs_qm_dqdetach( + xfs_inode_t *ip) +{ + if (!(ip->i_udquot || ip->i_gdquot)) + return; + + trace_xfs_dquot_dqdetach(ip); + + ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); + ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); + if (ip->i_udquot) { + xfs_qm_dqrele(ip->i_udquot); + ip->i_udquot = NULL; + } + if (ip->i_gdquot) { + xfs_qm_dqrele(ip->i_gdquot); + ip->i_gdquot = NULL; + } +} + +int +xfs_qm_sync( + struct xfs_mount *mp, + int flags) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + int recl, restarts; + struct xfs_dquot *dqp; + int error; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return 0; + + restarts = 0; + + again: + mutex_lock(&q->qi_dqlist_lock); + /* + * dqpurge_all() also takes the mplist lock and iterate thru all dquots + * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared + * when we have the mplist lock, we know that dquots will be consistent + * as long as we have it locked. + */ + if (!XFS_IS_QUOTA_ON(mp)) { + mutex_unlock(&q->qi_dqlist_lock); + return 0; + } + ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); + list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { + /* + * If this is vfs_sync calling, then skip the dquots that + * don't 'seem' to be dirty. ie. don't acquire dqlock. + * This is very similar to what xfs_sync does with inodes. + */ + if (flags & SYNC_TRYLOCK) { + if (!XFS_DQ_IS_DIRTY(dqp)) + continue; + if (!xfs_qm_dqlock_nowait(dqp)) + continue; + } else { + xfs_dqlock(dqp); + } + + /* + * Now, find out for sure if this dquot is dirty or not. + */ + if (! XFS_DQ_IS_DIRTY(dqp)) { + xfs_dqunlock(dqp); + continue; + } + + /* XXX a sentinel would be better */ + recl = q->qi_dqreclaims; + if (!xfs_dqflock_nowait(dqp)) { + if (flags & SYNC_TRYLOCK) { + xfs_dqunlock(dqp); + continue; + } + /* + * If we can't grab the flush lock then if the caller + * really wanted us to give this our best shot, so + * see if we can give a push to the buffer before we wait + * on the flush lock. At this point, we know that + * even though the dquot is being flushed, + * it has (new) dirty data. + */ + xfs_qm_dqflock_pushbuf_wait(dqp); + } + /* + * Let go of the mplist lock. We don't want to hold it + * across a disk write + */ + mutex_unlock(&q->qi_dqlist_lock); + error = xfs_qm_dqflush(dqp, flags); + xfs_dqunlock(dqp); + if (error && XFS_FORCED_SHUTDOWN(mp)) + return 0; /* Need to prevent umount failure */ + else if (error) + return error; + + mutex_lock(&q->qi_dqlist_lock); + if (recl != q->qi_dqreclaims) { + if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS) + break; + + mutex_unlock(&q->qi_dqlist_lock); + goto again; + } + } + + mutex_unlock(&q->qi_dqlist_lock); + return 0; +} + +/* + * The hash chains and the mplist use the same xfs_dqhash structure as + * their list head, but we can take the mplist qh_lock and one of the + * hash qh_locks at the same time without any problem as they aren't + * related. + */ +static struct lock_class_key xfs_quota_mplist_class; + +/* + * This initializes all the quota information that's kept in the + * mount structure + */ +STATIC int +xfs_qm_init_quotainfo( + xfs_mount_t *mp) +{ + xfs_quotainfo_t *qinf; + int error; + xfs_dquot_t *dqp; + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * Tell XQM that we exist as soon as possible. + */ + if ((error = xfs_qm_hold_quotafs_ref(mp))) { + return error; + } + + qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); + + /* + * See if quotainodes are setup, and if not, allocate them, + * and change the superblock accordingly. + */ + if ((error = xfs_qm_init_quotainos(mp))) { + kmem_free(qinf); + mp->m_quotainfo = NULL; + return error; + } + + INIT_LIST_HEAD(&qinf->qi_dqlist); + mutex_init(&qinf->qi_dqlist_lock); + lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class); + + qinf->qi_dqreclaims = 0; + + /* mutex used to serialize quotaoffs */ + mutex_init(&qinf->qi_quotaofflock); + + /* Precalc some constants */ + qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); + ASSERT(qinf->qi_dqchunklen); + qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen); + do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t)); + + mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); + + /* + * We try to get the limits from the superuser's limits fields. + * This is quite hacky, but it is standard quota practice. + * We look at the USR dquot with id == 0 first, but if user quotas + * are not enabled we goto the GRP dquot with id == 0. + * We don't really care to keep separate default limits for user + * and group quotas, at least not at this point. + */ + error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0, + XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : + (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : + XFS_DQ_PROJ), + XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN, + &dqp); + if (! error) { + xfs_disk_dquot_t *ddqp = &dqp->q_core; + + /* + * The warnings and timers set the grace period given to + * a user or group before he or she can not perform any + * more writing. If it is zero, a default is used. + */ + qinf->qi_btimelimit = ddqp->d_btimer ? + be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT; + qinf->qi_itimelimit = ddqp->d_itimer ? + be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT; + qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ? + be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT; + qinf->qi_bwarnlimit = ddqp->d_bwarns ? + be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT; + qinf->qi_iwarnlimit = ddqp->d_iwarns ? + be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT; + qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ? + be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT; + qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); + qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit); + qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); + qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); + qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); + qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); + + /* + * We sent the XFS_QMOPT_DQSUSER flag to dqget because + * we don't want this dquot cached. We haven't done a + * quotacheck yet, and quotacheck doesn't like incore dquots. + */ + xfs_qm_dqdestroy(dqp); + } else { + qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; + qinf->qi_itimelimit = XFS_QM_ITIMELIMIT; + qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT; + qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT; + qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT; + qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; + } + + return 0; +} + + +/* + * Gets called when unmounting a filesystem or when all quotas get + * turned off. + * This purges the quota inodes, destroys locks and frees itself. + */ +void +xfs_qm_destroy_quotainfo( + xfs_mount_t *mp) +{ + xfs_quotainfo_t *qi; + + qi = mp->m_quotainfo; + ASSERT(qi != NULL); + ASSERT(xfs_Gqm != NULL); + + /* + * Release the reference that XQM kept, so that we know + * when the XQM structure should be freed. We cannot assume + * that xfs_Gqm is non-null after this point. + */ + xfs_qm_rele_quotafs_ref(mp); + + ASSERT(list_empty(&qi->qi_dqlist)); + mutex_destroy(&qi->qi_dqlist_lock); + + if (qi->qi_uquotaip) { + IRELE(qi->qi_uquotaip); + qi->qi_uquotaip = NULL; /* paranoia */ + } + if (qi->qi_gquotaip) { + IRELE(qi->qi_gquotaip); + qi->qi_gquotaip = NULL; + } + mutex_destroy(&qi->qi_quotaofflock); + kmem_free(qi); + mp->m_quotainfo = NULL; +} + + + +/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */ + +/* ARGSUSED */ +STATIC void +xfs_qm_list_init( + xfs_dqlist_t *list, + char *str, + int n) +{ + mutex_init(&list->qh_lock); + INIT_LIST_HEAD(&list->qh_list); + list->qh_version = 0; + list->qh_nelems = 0; +} + +STATIC void +xfs_qm_list_destroy( + xfs_dqlist_t *list) +{ + mutex_destroy(&(list->qh_lock)); +} + +/* + * Create an inode and return with a reference already taken, but unlocked + * This is how we create quota inodes + */ +STATIC int +xfs_qm_qino_alloc( + xfs_mount_t *mp, + xfs_inode_t **ip, + __int64_t sbfields, + uint flags) +{ + xfs_trans_t *tp; + int error; + int committed; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); + if ((error = xfs_trans_reserve(tp, + XFS_QM_QINOCREATE_SPACE_RES(mp), + XFS_CREATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_CREATE_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + return error; + } + + error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); + if (error) { + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | + XFS_TRANS_ABORT); + return error; + } + + /* + * Make the changes in the superblock, and log those too. + * sbfields arg may contain fields other than *QUOTINO; + * VERSIONNUM for example. + */ + spin_lock(&mp->m_sb_lock); + if (flags & XFS_QMOPT_SBVERSION) { + ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); + ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | + XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == + (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | + XFS_SB_GQUOTINO | XFS_SB_QFLAGS)); + + xfs_sb_version_addquota(&mp->m_sb); + mp->m_sb.sb_uquotino = NULLFSINO; + mp->m_sb.sb_gquotino = NULLFSINO; + + /* qflags will get updated _after_ quotacheck */ + mp->m_sb.sb_qflags = 0; + } + if (flags & XFS_QMOPT_UQUOTA) + mp->m_sb.sb_uquotino = (*ip)->i_ino; + else + mp->m_sb.sb_gquotino = (*ip)->i_ino; + spin_unlock(&mp->m_sb_lock); + xfs_mod_sb(tp, sbfields); + + if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { + xfs_alert(mp, "%s failed (error %d)!", __func__, error); + return error; + } + return 0; +} + + +STATIC void +xfs_qm_reset_dqcounts( + xfs_mount_t *mp, + xfs_buf_t *bp, + xfs_dqid_t id, + uint type) +{ + xfs_disk_dquot_t *ddq; + int j; + + trace_xfs_reset_dqcounts(bp, _RET_IP_); + + /* + * Reset all counters and timers. They'll be + * started afresh by xfs_qm_quotacheck. + */ +#ifdef DEBUG + j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); + do_div(j, sizeof(xfs_dqblk_t)); + ASSERT(mp->m_quotainfo->qi_dqperchunk == j); +#endif + ddq = bp->b_addr; + for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { + /* + * Do a sanity check, and if needed, repair the dqblk. Don't + * output any warnings because it's perfectly possible to + * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. + */ + (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, + "xfs_quotacheck"); + ddq->d_bcount = 0; + ddq->d_icount = 0; + ddq->d_rtbcount = 0; + ddq->d_btimer = 0; + ddq->d_itimer = 0; + ddq->d_rtbtimer = 0; + ddq->d_bwarns = 0; + ddq->d_iwarns = 0; + ddq->d_rtbwarns = 0; + ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); + } +} + +STATIC int +xfs_qm_dqiter_bufs( + xfs_mount_t *mp, + xfs_dqid_t firstid, + xfs_fsblock_t bno, + xfs_filblks_t blkcnt, + uint flags) +{ + xfs_buf_t *bp; + int error; + int type; + + ASSERT(blkcnt > 0); + type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : + (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); + error = 0; + + /* + * Blkcnt arg can be a very big number, and might even be + * larger than the log itself. So, we have to break it up into + * manageable-sized transactions. + * Note that we don't start a permanent transaction here; we might + * not be able to get a log reservation for the whole thing up front, + * and we don't really care to either, because we just discard + * everything if we were to crash in the middle of this loop. + */ + while (blkcnt--) { + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, bno), + mp->m_quotainfo->qi_dqchunklen, 0, &bp); + if (error) + break; + + xfs_qm_reset_dqcounts(mp, bp, firstid, type); + xfs_bdwrite(mp, bp); + /* + * goto the next block. + */ + bno++; + firstid += mp->m_quotainfo->qi_dqperchunk; + } + return error; +} + +/* + * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a + * caller supplied function for every chunk of dquots that we find. + */ +STATIC int +xfs_qm_dqiterate( + xfs_mount_t *mp, + xfs_inode_t *qip, + uint flags) +{ + xfs_bmbt_irec_t *map; + int i, nmaps; /* number of map entries */ + int error; /* return value */ + xfs_fileoff_t lblkno; + xfs_filblks_t maxlblkcnt; + xfs_dqid_t firstid; + xfs_fsblock_t rablkno; + xfs_filblks_t rablkcnt; + + error = 0; + /* + * This looks racy, but we can't keep an inode lock across a + * trans_reserve. But, this gets called during quotacheck, and that + * happens only at mount time which is single threaded. + */ + if (qip->i_d.di_nblocks == 0) + return 0; + + map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP); + + lblkno = 0; + maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); + do { + nmaps = XFS_DQITER_MAP_SIZE; + /* + * We aren't changing the inode itself. Just changing + * some of its data. No new blocks are added here, and + * the inode is never added to the transaction. + */ + xfs_ilock(qip, XFS_ILOCK_SHARED); + error = xfs_bmapi(NULL, qip, lblkno, + maxlblkcnt - lblkno, + XFS_BMAPI_METADATA, + NULL, + 0, map, &nmaps, NULL); + xfs_iunlock(qip, XFS_ILOCK_SHARED); + if (error) + break; + + ASSERT(nmaps <= XFS_DQITER_MAP_SIZE); + for (i = 0; i < nmaps; i++) { + ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); + ASSERT(map[i].br_blockcount); + + + lblkno += map[i].br_blockcount; + + if (map[i].br_startblock == HOLESTARTBLOCK) + continue; + + firstid = (xfs_dqid_t) map[i].br_startoff * + mp->m_quotainfo->qi_dqperchunk; + /* + * Do a read-ahead on the next extent. + */ + if ((i+1 < nmaps) && + (map[i+1].br_startblock != HOLESTARTBLOCK)) { + rablkcnt = map[i+1].br_blockcount; + rablkno = map[i+1].br_startblock; + while (rablkcnt--) { + xfs_buf_readahead(mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, rablkno), + mp->m_quotainfo->qi_dqchunklen); + rablkno++; + } + } + /* + * Iterate thru all the blks in the extent and + * reset the counters of all the dquots inside them. + */ + if ((error = xfs_qm_dqiter_bufs(mp, + firstid, + map[i].br_startblock, + map[i].br_blockcount, + flags))) { + break; + } + } + + if (error) + break; + } while (nmaps > 0); + + kmem_free(map); + + return error; +} + +/* + * Called by dqusage_adjust in doing a quotacheck. + * + * Given the inode, and a dquot id this updates both the incore dqout as well + * as the buffer copy. This is so that once the quotacheck is done, we can + * just log all the buffers, as opposed to logging numerous updates to + * individual dquots. + */ +STATIC int +xfs_qm_quotacheck_dqadjust( + struct xfs_inode *ip, + xfs_dqid_t id, + uint type, + xfs_qcnt_t nblks, + xfs_qcnt_t rtblks) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_dquot *dqp; + int error; + + error = xfs_qm_dqget(mp, ip, id, type, + XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); + if (error) { + /* + * Shouldn't be able to turn off quotas here. + */ + ASSERT(error != ESRCH); + ASSERT(error != ENOENT); + return error; + } + + trace_xfs_dqadjust(dqp); + + /* + * Adjust the inode count and the block count to reflect this inode's + * resource usage. + */ + be64_add_cpu(&dqp->q_core.d_icount, 1); + dqp->q_res_icount++; + if (nblks) { + be64_add_cpu(&dqp->q_core.d_bcount, nblks); + dqp->q_res_bcount += nblks; + } + if (rtblks) { + be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks); + dqp->q_res_rtbcount += rtblks; + } + + /* + * Set default limits, adjust timers (since we changed usages) + * + * There are no timers for the default values set in the root dquot. + */ + if (dqp->q_core.d_id) { + xfs_qm_adjust_dqlimits(mp, &dqp->q_core); + xfs_qm_adjust_dqtimers(mp, &dqp->q_core); + } + + dqp->dq_flags |= XFS_DQ_DIRTY; + xfs_qm_dqput(dqp); + return 0; +} + +STATIC int +xfs_qm_get_rtblks( + xfs_inode_t *ip, + xfs_qcnt_t *O_rtblks) +{ + xfs_filblks_t rtblks; /* total rt blks */ + xfs_extnum_t idx; /* extent record index */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t nextents; /* number of extent entries */ + int error; + + ASSERT(XFS_IS_REALTIME_INODE(ip)); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK))) + return error; + } + rtblks = 0; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + for (idx = 0; idx < nextents; idx++) + rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); + *O_rtblks = (xfs_qcnt_t)rtblks; + return 0; +} + +/* + * callback routine supplied to bulkstat(). Given an inumber, find its + * dquots and update them to account for resources taken by that inode. + */ +/* ARGSUSED */ +STATIC int +xfs_qm_dqusage_adjust( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* not used */ + int ubsize, /* not used */ + int *ubused, /* not used */ + int *res) /* result code value */ +{ + xfs_inode_t *ip; + xfs_qcnt_t nblks, rtblks = 0; + int error; + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * rootino must have its resources accounted for, not so with the quota + * inodes. + */ + if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { + *res = BULKSTAT_RV_NOTHING; + return XFS_ERROR(EINVAL); + } + + /* + * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget + * interface expects the inode to be exclusively locked because that's + * the case in all other instances. It's OK that we do this because + * quotacheck is done only at mount time. + */ + error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); + if (error) { + *res = BULKSTAT_RV_NOTHING; + return error; + } + + ASSERT(ip->i_delayed_blks == 0); + + if (XFS_IS_REALTIME_INODE(ip)) { + /* + * Walk thru the extent list and count the realtime blocks. + */ + error = xfs_qm_get_rtblks(ip, &rtblks); + if (error) + goto error0; + } + + nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; + + /* + * Add the (disk blocks and inode) resources occupied by this + * inode to its dquots. We do this adjustment in the incore dquot, + * and also copy the changes to its buffer. + * We don't care about putting these changes in a transaction + * envelope because if we crash in the middle of a 'quotacheck' + * we have to start from the beginning anyway. + * Once we're done, we'll log all the dquot bufs. + * + * The *QUOTA_ON checks below may look pretty racy, but quotachecks + * and quotaoffs don't race. (Quotachecks happen at mount time only). + */ + if (XFS_IS_UQUOTA_ON(mp)) { + error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid, + XFS_DQ_USER, nblks, rtblks); + if (error) + goto error0; + } + + if (XFS_IS_GQUOTA_ON(mp)) { + error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid, + XFS_DQ_GROUP, nblks, rtblks); + if (error) + goto error0; + } + + if (XFS_IS_PQUOTA_ON(mp)) { + error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip), + XFS_DQ_PROJ, nblks, rtblks); + if (error) + goto error0; + } + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); + *res = BULKSTAT_RV_DIDONE; + return 0; + +error0: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); + *res = BULKSTAT_RV_GIVEUP; + return error; +} + +/* + * Walk thru all the filesystem inodes and construct a consistent view + * of the disk quota world. If the quotacheck fails, disable quotas. + */ +int +xfs_qm_quotacheck( + xfs_mount_t *mp) +{ + int done, count, error; + xfs_ino_t lastino; + size_t structsz; + xfs_inode_t *uip, *gip; + uint flags; + + count = INT_MAX; + structsz = 1; + lastino = 0; + flags = 0; + + ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * There should be no cached dquots. The (simplistic) quotacheck + * algorithm doesn't like that. + */ + ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); + + xfs_notice(mp, "Quotacheck needed: Please wait."); + + /* + * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset + * their counters to zero. We need a clean slate. + * We don't log our changes till later. + */ + uip = mp->m_quotainfo->qi_uquotaip; + if (uip) { + error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA); + if (error) + goto error_return; + flags |= XFS_UQUOTA_CHKD; + } + + gip = mp->m_quotainfo->qi_gquotaip; + if (gip) { + error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? + XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); + if (error) + goto error_return; + flags |= XFS_OQUOTA_CHKD; + } + + do { + /* + * Iterate thru all the inodes in the file system, + * adjusting the corresponding dquot counters in core. + */ + error = xfs_bulkstat(mp, &lastino, &count, + xfs_qm_dqusage_adjust, + structsz, NULL, &done); + if (error) + break; + + } while (!done); + + /* + * We've made all the changes that we need to make incore. + * Flush them down to disk buffers if everything was updated + * successfully. + */ + if (!error) + error = xfs_qm_dqflush_all(mp, 0); + + /* + * We can get this error if we couldn't do a dquot allocation inside + * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the + * dirty dquots that might be cached, we just want to get rid of them + * and turn quotaoff. The dquots won't be attached to any of the inodes + * at this point (because we intentionally didn't in dqget_noattach). + */ + if (error) { + xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); + goto error_return; + } + + /* + * We didn't log anything, because if we crashed, we'll have to + * start the quotacheck from scratch anyway. However, we must make + * sure that our dquot changes are secure before we put the + * quotacheck'd stamp on the superblock. So, here we do a synchronous + * flush. + */ + XFS_bflush(mp->m_ddev_targp); + + /* + * If one type of quotas is off, then it will lose its + * quotachecked status, since we won't be doing accounting for + * that type anymore. + */ + mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); + mp->m_qflags |= flags; + + error_return: + if (error) { + xfs_warn(mp, + "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", + error); + /* + * We must turn off quotas. + */ + ASSERT(mp->m_quotainfo != NULL); + ASSERT(xfs_Gqm != NULL); + xfs_qm_destroy_quotainfo(mp); + if (xfs_mount_reset_sbqflags(mp)) { + xfs_warn(mp, + "Quotacheck: Failed to reset quota flags."); + } + } else + xfs_notice(mp, "Quotacheck: Done."); + return (error); +} + +/* + * This is called after the superblock has been read in and we're ready to + * iget the quota inodes. + */ +STATIC int +xfs_qm_init_quotainos( + xfs_mount_t *mp) +{ + xfs_inode_t *uip, *gip; + int error; + __int64_t sbflags; + uint flags; + + ASSERT(mp->m_quotainfo); + uip = gip = NULL; + sbflags = 0; + flags = 0; + + /* + * Get the uquota and gquota inodes + */ + if (xfs_sb_version_hasquota(&mp->m_sb)) { + if (XFS_IS_UQUOTA_ON(mp) && + mp->m_sb.sb_uquotino != NULLFSINO) { + ASSERT(mp->m_sb.sb_uquotino > 0); + if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, + 0, 0, &uip))) + return XFS_ERROR(error); + } + if (XFS_IS_OQUOTA_ON(mp) && + mp->m_sb.sb_gquotino != NULLFSINO) { + ASSERT(mp->m_sb.sb_gquotino > 0); + if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, + 0, 0, &gip))) { + if (uip) + IRELE(uip); + return XFS_ERROR(error); + } + } + } else { + flags |= XFS_QMOPT_SBVERSION; + sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | + XFS_SB_GQUOTINO | XFS_SB_QFLAGS); + } + + /* + * Create the two inodes, if they don't exist already. The changes + * made above will get added to a transaction and logged in one of + * the qino_alloc calls below. If the device is readonly, + * temporarily switch to read-write to do this. + */ + if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { + if ((error = xfs_qm_qino_alloc(mp, &uip, + sbflags | XFS_SB_UQUOTINO, + flags | XFS_QMOPT_UQUOTA))) + return XFS_ERROR(error); + + flags &= ~XFS_QMOPT_SBVERSION; + } + if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { + flags |= (XFS_IS_GQUOTA_ON(mp) ? + XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); + error = xfs_qm_qino_alloc(mp, &gip, + sbflags | XFS_SB_GQUOTINO, flags); + if (error) { + if (uip) + IRELE(uip); + + return XFS_ERROR(error); + } + } + + mp->m_quotainfo->qi_uquotaip = uip; + mp->m_quotainfo->qi_gquotaip = gip; + + return 0; +} + + + +/* + * Just pop the least recently used dquot off the freelist and + * recycle it. The returned dquot is locked. + */ +STATIC xfs_dquot_t * +xfs_qm_dqreclaim_one(void) +{ + xfs_dquot_t *dqpout; + xfs_dquot_t *dqp; + int restarts; + int startagain; + + restarts = 0; + dqpout = NULL; + + /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ +again: + startagain = 0; + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + + list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { + struct xfs_mount *mp = dqp->q_mount; + xfs_dqlock(dqp); + + /* + * We are racing with dqlookup here. Naturally we don't + * want to reclaim a dquot that lookup wants. We release the + * freelist lock and start over, so that lookup will grab + * both the dquot and the freelistlock. + */ + if (dqp->dq_flags & XFS_DQ_WANT) { + ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); + + trace_xfs_dqreclaim_want(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqwants); + restarts++; + startagain = 1; + goto dqunlock; + } + + /* + * If the dquot is inactive, we are assured that it is + * not on the mplist or the hashlist, and that makes our + * life easier. + */ + if (dqp->dq_flags & XFS_DQ_INACTIVE) { + ASSERT(mp == NULL); + ASSERT(! XFS_DQ_IS_DIRTY(dqp)); + ASSERT(list_empty(&dqp->q_hashlist)); + ASSERT(list_empty(&dqp->q_mplist)); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + dqpout = dqp; + XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); + goto dqunlock; + } + + ASSERT(dqp->q_hash); + ASSERT(!list_empty(&dqp->q_mplist)); + + /* + * Try to grab the flush lock. If this dquot is in the process + * of getting flushed to disk, we don't want to reclaim it. + */ + if (!xfs_dqflock_nowait(dqp)) + goto dqunlock; + + /* + * We have the flush lock so we know that this is not in the + * process of being flushed. So, if this is dirty, flush it + * DELWRI so that we don't get a freelist infested with + * dirty dquots. + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + int error; + + trace_xfs_dqreclaim_dirty(dqp); + + /* + * We flush it delayed write, so don't bother + * releasing the freelist lock. + */ + error = xfs_qm_dqflush(dqp, 0); + if (error) { + xfs_warn(mp, "%s: dquot %p flush failed", + __func__, dqp); + } + goto dqunlock; + } + + /* + * We're trying to get the hashlock out of order. This races + * with dqlookup; so, we giveup and goto the next dquot if + * we couldn't get the hashlock. This way, we won't starve + * a dqlookup process that holds the hashlock that is + * waiting for the freelist lock. + */ + if (!mutex_trylock(&dqp->q_hash->qh_lock)) { + restarts++; + goto dqfunlock; + } + + /* + * This races with dquot allocation code as well as dqflush_all + * and reclaim code. So, if we failed to grab the mplist lock, + * giveup everything and start over. + */ + if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { + restarts++; + startagain = 1; + goto qhunlock; + } + + ASSERT(dqp->q_nrefs == 0); + list_del_init(&dqp->q_mplist); + mp->m_quotainfo->qi_dquots--; + mp->m_quotainfo->qi_dqreclaims++; + list_del_init(&dqp->q_hashlist); + dqp->q_hash->qh_version++; + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + dqpout = dqp; + mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); +qhunlock: + mutex_unlock(&dqp->q_hash->qh_lock); +dqfunlock: + xfs_dqfunlock(dqp); +dqunlock: + xfs_dqunlock(dqp); + if (dqpout) + break; + if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) + break; + if (startagain) { + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); + goto again; + } + } + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); + return dqpout; +} + +/* + * Traverse the freelist of dquots and attempt to reclaim a maximum of + * 'howmany' dquots. This operation races with dqlookup(), and attempts to + * favor the lookup function ... + */ +STATIC int +xfs_qm_shake_freelist( + int howmany) +{ + int nreclaimed = 0; + xfs_dquot_t *dqp; + + if (howmany <= 0) + return 0; + + while (nreclaimed < howmany) { + dqp = xfs_qm_dqreclaim_one(); + if (!dqp) + return nreclaimed; + xfs_qm_dqdestroy(dqp); + nreclaimed++; + } + return nreclaimed; +} + +/* + * The kmem_shake interface is invoked when memory is running low. + */ +/* ARGSUSED */ +STATIC int +xfs_qm_shake( + struct shrinker *shrink, + struct shrink_control *sc) +{ + int ndqused, nfree, n; + gfp_t gfp_mask = sc->gfp_mask; + + if (!kmem_shake_allow(gfp_mask)) + return 0; + if (!xfs_Gqm) + return 0; + + nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */ + /* incore dquots in all f/s's */ + ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; + + ASSERT(ndqused >= 0); + + if (nfree <= ndqused && nfree < ndquot) + return 0; + + ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */ + n = nfree - ndqused - ndquot; /* # over target */ + + return xfs_qm_shake_freelist(MAX(nfree, n)); +} + + +/*------------------------------------------------------------------*/ + +/* + * Return a new incore dquot. Depending on the number of + * dquots in the system, we either allocate a new one on the kernel heap, + * or reclaim a free one. + * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed + * to reclaim an existing one from the freelist. + */ +boolean_t +xfs_qm_dqalloc_incore( + xfs_dquot_t **O_dqpp) +{ + xfs_dquot_t *dqp; + + /* + * Check against high water mark to see if we want to pop + * a nincompoop dquot off the freelist. + */ + if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) { + /* + * Try to recycle a dquot from the freelist. + */ + if ((dqp = xfs_qm_dqreclaim_one())) { + XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); + /* + * Just zero the core here. The rest will get + * reinitialized by caller. XXX we shouldn't even + * do this zero ... + */ + memset(&dqp->q_core, 0, sizeof(dqp->q_core)); + *O_dqpp = dqp; + return B_FALSE; + } + XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); + } + + /* + * Allocate a brand new dquot on the kernel heap and return it + * to the caller to initialize. + */ + ASSERT(xfs_Gqm->qm_dqzone != NULL); + *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); + atomic_inc(&xfs_Gqm->qm_totaldquots); + + return B_TRUE; +} + + +/* + * Start a transaction and write the incore superblock changes to + * disk. flags parameter indicates which fields have changed. + */ +int +xfs_qm_write_sb_changes( + xfs_mount_t *mp, + __int64_t flags) +{ + xfs_trans_t *tp; + int error; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); + if ((error = xfs_trans_reserve(tp, 0, + mp->m_sb.sb_sectsize + 128, 0, + 0, + XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_mod_sb(tp, flags); + error = xfs_trans_commit(tp, 0); + + return error; +} + + +/* --------------- utility functions for vnodeops ---------------- */ + + +/* + * Given an inode, a uid, gid and prid make sure that we have + * allocated relevant dquot(s) on disk, and that we won't exceed inode + * quotas by creating this file. + * This also attaches dquot(s) to the given inode after locking it, + * and returns the dquots corresponding to the uid and/or gid. + * + * in : inode (unlocked) + * out : udquot, gdquot with references taken and unlocked + */ +int +xfs_qm_vop_dqalloc( + struct xfs_inode *ip, + uid_t uid, + gid_t gid, + prid_t prid, + uint flags, + struct xfs_dquot **O_udqpp, + struct xfs_dquot **O_gdqpp) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_dquot *uq, *gq; + int error; + uint lockflags; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return 0; + + lockflags = XFS_ILOCK_EXCL; + xfs_ilock(ip, lockflags); + + if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip)) + gid = ip->i_d.di_gid; + + /* + * Attach the dquot(s) to this inode, doing a dquot allocation + * if necessary. The dquot(s) will not be locked. + */ + if (XFS_NOT_DQATTACHED(mp, ip)) { + error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC); + if (error) { + xfs_iunlock(ip, lockflags); + return error; + } + } + + uq = gq = NULL; + if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { + if (ip->i_d.di_uid != uid) { + /* + * What we need is the dquot that has this uid, and + * if we send the inode to dqget, the uid of the inode + * takes priority over what's sent in the uid argument. + * We must unlock inode here before calling dqget if + * we're not sending the inode, because otherwise + * we'll deadlock by doing trans_reserve while + * holding ilock. + */ + xfs_iunlock(ip, lockflags); + if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, + XFS_DQ_USER, + XFS_QMOPT_DQALLOC | + XFS_QMOPT_DOWARN, + &uq))) { + ASSERT(error != ENOENT); + return error; + } + /* + * Get the ilock in the right order. + */ + xfs_dqunlock(uq); + lockflags = XFS_ILOCK_SHARED; + xfs_ilock(ip, lockflags); + } else { + /* + * Take an extra reference, because we'll return + * this to caller + */ + ASSERT(ip->i_udquot); + uq = ip->i_udquot; + xfs_dqlock(uq); + XFS_DQHOLD(uq); + xfs_dqunlock(uq); + } + } + if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { + if (ip->i_d.di_gid != gid) { + xfs_iunlock(ip, lockflags); + if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, + XFS_DQ_GROUP, + XFS_QMOPT_DQALLOC | + XFS_QMOPT_DOWARN, + &gq))) { + if (uq) + xfs_qm_dqrele(uq); + ASSERT(error != ENOENT); + return error; + } + xfs_dqunlock(gq); + lockflags = XFS_ILOCK_SHARED; + xfs_ilock(ip, lockflags); + } else { + ASSERT(ip->i_gdquot); + gq = ip->i_gdquot; + xfs_dqlock(gq); + XFS_DQHOLD(gq); + xfs_dqunlock(gq); + } + } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { + if (xfs_get_projid(ip) != prid) { + xfs_iunlock(ip, lockflags); + if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, + XFS_DQ_PROJ, + XFS_QMOPT_DQALLOC | + XFS_QMOPT_DOWARN, + &gq))) { + if (uq) + xfs_qm_dqrele(uq); + ASSERT(error != ENOENT); + return (error); + } + xfs_dqunlock(gq); + lockflags = XFS_ILOCK_SHARED; + xfs_ilock(ip, lockflags); + } else { + ASSERT(ip->i_gdquot); + gq = ip->i_gdquot; + xfs_dqlock(gq); + XFS_DQHOLD(gq); + xfs_dqunlock(gq); + } + } + if (uq) + trace_xfs_dquot_dqalloc(ip); + + xfs_iunlock(ip, lockflags); + if (O_udqpp) + *O_udqpp = uq; + else if (uq) + xfs_qm_dqrele(uq); + if (O_gdqpp) + *O_gdqpp = gq; + else if (gq) + xfs_qm_dqrele(gq); + return 0; +} + +/* + * Actually transfer ownership, and do dquot modifications. + * These were already reserved. + */ +xfs_dquot_t * +xfs_qm_vop_chown( + xfs_trans_t *tp, + xfs_inode_t *ip, + xfs_dquot_t **IO_olddq, + xfs_dquot_t *newdq) +{ + xfs_dquot_t *prevdq; + uint bfield = XFS_IS_REALTIME_INODE(ip) ? + XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; + + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); + + /* old dquot */ + prevdq = *IO_olddq; + ASSERT(prevdq); + ASSERT(prevdq != newdq); + + xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks)); + xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1); + + /* the sparkling new dquot */ + xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks); + xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); + + /* + * Take an extra reference, because the inode + * is going to keep this dquot pointer even + * after the trans_commit. + */ + xfs_dqlock(newdq); + XFS_DQHOLD(newdq); + xfs_dqunlock(newdq); + *IO_olddq = newdq; + + return prevdq; +} + +/* + * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID). + */ +int +xfs_qm_vop_chown_reserve( + xfs_trans_t *tp, + xfs_inode_t *ip, + xfs_dquot_t *udqp, + xfs_dquot_t *gdqp, + uint flags) +{ + xfs_mount_t *mp = ip->i_mount; + uint delblks, blkflags, prjflags = 0; + xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; + int error; + + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + delblks = ip->i_delayed_blks; + delblksudq = delblksgdq = unresudq = unresgdq = NULL; + blkflags = XFS_IS_REALTIME_INODE(ip) ? + XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; + + if (XFS_IS_UQUOTA_ON(mp) && udqp && + ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { + delblksudq = udqp; + /* + * If there are delayed allocation blocks, then we have to + * unreserve those from the old dquot, and add them to the + * new dquot. + */ + if (delblks) { + ASSERT(ip->i_udquot); + unresudq = ip->i_udquot; + } + } + if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { + if (XFS_IS_PQUOTA_ON(ip->i_mount) && + xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) + prjflags = XFS_QMOPT_ENOSPC; + + if (prjflags || + (XFS_IS_GQUOTA_ON(ip->i_mount) && + ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { + delblksgdq = gdqp; + if (delblks) { + ASSERT(ip->i_gdquot); + unresgdq = ip->i_gdquot; + } + } + } + + if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, + delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, + flags | blkflags | prjflags))) + return (error); + + /* + * Do the delayed blks reservations/unreservations now. Since, these + * are done without the help of a transaction, if a reservation fails + * its previous reservations won't be automatically undone by trans + * code. So, we have to do it manually here. + */ + if (delblks) { + /* + * Do the reservations first. Unreservation can't fail. + */ + ASSERT(delblksudq || delblksgdq); + ASSERT(unresudq || unresgdq); + if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, + delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, + flags | blkflags | prjflags))) + return (error); + xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, + unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, + blkflags); + } + + return (0); +} + +int +xfs_qm_vop_rename_dqattach( + struct xfs_inode **i_tab) +{ + struct xfs_mount *mp = i_tab[0]->i_mount; + int i; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return 0; + + for (i = 0; (i < 4 && i_tab[i]); i++) { + struct xfs_inode *ip = i_tab[i]; + int error; + + /* + * Watch out for duplicate entries in the table. + */ + if (i == 0 || ip != i_tab[i-1]) { + if (XFS_NOT_DQATTACHED(mp, ip)) { + error = xfs_qm_dqattach(ip, 0); + if (error) + return error; + } + } + } + return 0; +} + +void +xfs_qm_vop_create_dqattach( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_dquot *udqp, + struct xfs_dquot *gdqp) +{ + struct xfs_mount *mp = tp->t_mountp; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + if (udqp) { + xfs_dqlock(udqp); + XFS_DQHOLD(udqp); + xfs_dqunlock(udqp); + ASSERT(ip->i_udquot == NULL); + ip->i_udquot = udqp; + ASSERT(XFS_IS_UQUOTA_ON(mp)); + ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); + xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); + } + if (gdqp) { + xfs_dqlock(gdqp); + XFS_DQHOLD(gdqp); + xfs_dqunlock(gdqp); + ASSERT(ip->i_gdquot == NULL); + ip->i_gdquot = gdqp; + ASSERT(XFS_IS_OQUOTA_ON(mp)); + ASSERT((XFS_IS_GQUOTA_ON(mp) ? + ip->i_d.di_gid : xfs_get_projid(ip)) == + be32_to_cpu(gdqp->q_core.d_id)); + xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); + } +} + diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h new file mode 100644 index 0000000..43b9abe --- /dev/null +++ b/fs/xfs/xfs_qm.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_QM_H__ +#define __XFS_QM_H__ + +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" +#include "xfs_quota_priv.h" +#include "xfs_qm_stats.h" + +struct xfs_qm; +struct xfs_inode; + +extern uint ndquot; +extern struct mutex xfs_Gqm_lock; +extern struct xfs_qm *xfs_Gqm; +extern kmem_zone_t *qm_dqzone; +extern kmem_zone_t *qm_dqtrxzone; + +/* + * Used in xfs_qm_sync called by xfs_sync to count the max times that it can + * iterate over the mountpt's dquot list in one call. + */ +#define XFS_QM_SYNC_MAX_RESTARTS 7 + +/* + * Ditto, for xfs_qm_dqreclaim_one. + */ +#define XFS_QM_RECLAIM_MAX_RESTARTS 4 + +/* + * Ideal ratio of free to in use dquots. Quota manager makes an attempt + * to keep this balance. + */ +#define XFS_QM_DQFREE_RATIO 2 + +/* + * Dquot hashtable constants/threshold values. + */ +#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) +#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t)) + +/* + * This defines the unit of allocation of dquots. + * Currently, it is just one file system block, and a 4K blk contains 30 + * (136 * 30 = 4080) dquots. It's probably not worth trying to make + * this more dynamic. + * XXXsup However, if this number is changed, we have to make sure that we don't + * implicitly assume that we do allocations in chunks of a single filesystem + * block in the dquot/xqm code. + */ +#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 + +typedef xfs_dqhash_t xfs_dqlist_t; + +/* + * Quota Manager (global) structure. Lives only in core. + */ +typedef struct xfs_qm { + xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */ + xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */ + uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */ + struct list_head qm_dqfrlist; /* freelist of dquots */ + struct mutex qm_dqfrlist_lock; + int qm_dqfrlist_cnt; + atomic_t qm_totaldquots; /* total incore dquots */ + uint qm_nrefs; /* file systems with quota on */ + int qm_dqfree_ratio;/* ratio of free to inuse dquots */ + kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ + kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ +} xfs_qm_t; + +/* + * Various quota information for individual filesystems. + * The mount structure keeps a pointer to this. + */ +typedef struct xfs_quotainfo { + xfs_inode_t *qi_uquotaip; /* user quota inode */ + xfs_inode_t *qi_gquotaip; /* group quota inode */ + struct list_head qi_dqlist; /* all dquots in filesys */ + struct mutex qi_dqlist_lock; + int qi_dquots; + int qi_dqreclaims; /* a change here indicates + a removal in the dqlist */ + time_t qi_btimelimit; /* limit for blks timer */ + time_t qi_itimelimit; /* limit for inodes timer */ + time_t qi_rtbtimelimit;/* limit for rt blks timer */ + xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ + xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ + xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ + struct mutex qi_quotaofflock;/* to serialize quotaoff */ + xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ + uint qi_dqperchunk; /* # ondisk dqs in above chunk */ + xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ + xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */ + xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */ + xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ + xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ + xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ +} xfs_quotainfo_t; + + +extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); +extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, + xfs_dquot_t *, xfs_dquot_t *, long, long, uint); +extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *); +extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *); + +/* + * We keep the usr and grp dquots separately so that locking will be easier + * to do at commit time. All transactions that we know of at this point + * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. + */ +#define XFS_QM_TRANS_MAXDQS 2 +typedef struct xfs_dquot_acct { + xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS]; + xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS]; +} xfs_dquot_acct_t; + +/* + * Users are allowed to have a usage exceeding their softlimit for + * a period this long. + */ +#define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */ +#define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */ +#define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */ + +#define XFS_QM_BWARNLIMIT 5 +#define XFS_QM_IWARNLIMIT 5 +#define XFS_QM_RTBWARNLIMIT 5 + +extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); +extern int xfs_qm_quotacheck(xfs_mount_t *); +extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); + +/* dquot stuff */ +extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **); +extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); +extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); + +/* quota ops */ +extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); +extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, + fs_disk_quota_t *); +extern int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint, + fs_disk_quota_t *); +extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); +extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); +extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); + +#endif /* __XFS_QM_H__ */ diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c new file mode 100644 index 0000000..a0a829a --- /dev/null +++ b/fs/xfs/xfs_qm_bhv.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_qm.h" + + +STATIC void +xfs_fill_statvfs_from_dquot( + struct kstatfs *statp, + xfs_disk_dquot_t *dp) +{ + __uint64_t limit; + + limit = dp->d_blk_softlimit ? + be64_to_cpu(dp->d_blk_softlimit) : + be64_to_cpu(dp->d_blk_hardlimit); + if (limit && statp->f_blocks > limit) { + statp->f_blocks = limit; + statp->f_bfree = statp->f_bavail = + (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? + (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; + } + + limit = dp->d_ino_softlimit ? + be64_to_cpu(dp->d_ino_softlimit) : + be64_to_cpu(dp->d_ino_hardlimit); + if (limit && statp->f_files > limit) { + statp->f_files = limit; + statp->f_ffree = + (statp->f_files > be64_to_cpu(dp->d_icount)) ? + (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0; + } +} + + +/* + * Directory tree accounting is implemented using project quotas, where + * the project identifier is inherited from parent directories. + * A statvfs (df, etc.) of a directory that is using project quota should + * return a statvfs of the project, not the entire filesystem. + * This makes such trees appear as if they are filesystems in themselves. + */ +void +xfs_qm_statvfs( + xfs_inode_t *ip, + struct kstatfs *statp) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_dquot_t *dqp; + + if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { + xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); + xfs_qm_dqput(dqp); + } +} + +int +xfs_qm_newmount( + xfs_mount_t *mp, + uint *needquotamount, + uint *quotaflags) +{ + uint quotaondisk; + uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0; + + quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) && + (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT); + + if (quotaondisk) { + uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT; + pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT; + gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT; + } + + /* + * If the device itself is read-only, we can't allow + * the user to change the state of quota on the mount - + * this would generate a transaction on the ro device, + * which would lead to an I/O error and shutdown + */ + + if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || + (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || + (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || + (!pquotaondisk && XFS_IS_PQUOTA_ON(mp)) || + (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || + (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && + xfs_dev_is_read_only(mp, "changing quota state")) { + xfs_warn(mp, "please mount with%s%s%s%s.", + (!quotaondisk ? "out quota" : ""), + (uquotaondisk ? " usrquota" : ""), + (pquotaondisk ? " prjquota" : ""), + (gquotaondisk ? " grpquota" : "")); + return XFS_ERROR(EPERM); + } + + if (XFS_IS_QUOTA_ON(mp) || quotaondisk) { + /* + * Call mount_quotas at this point only if we won't have to do + * a quotacheck. + */ + if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { + /* + * If an error occurred, qm_mount_quotas code + * has already disabled quotas. So, just finish + * mounting, and get on with the boring life + * without disk quotas. + */ + xfs_qm_mount_quotas(mp); + } else { + /* + * Clear the quota flags, but remember them. This + * is so that the quota code doesn't get invoked + * before we're ready. This can happen when an + * inode goes inactive and wants to free blocks, + * or via xfs_log_mount_finish. + */ + *needquotamount = B_TRUE; + *quotaflags = mp->m_qflags; + mp->m_qflags = 0; + } + } + + return 0; +} + +void __init +xfs_qm_init(void) +{ + printk(KERN_INFO "SGI XFS Quota Management subsystem\n"); + mutex_init(&xfs_Gqm_lock); + xfs_qm_init_procfs(); +} + +void __exit +xfs_qm_exit(void) +{ + xfs_qm_cleanup_procfs(); + if (qm_dqzone) + kmem_zone_destroy(qm_dqzone); + if (qm_dqtrxzone) + kmem_zone_destroy(qm_dqtrxzone); +} diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c new file mode 100644 index 0000000..8671a0b --- /dev/null +++ b/fs/xfs/xfs_qm_stats.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_qm.h" + +struct xqmstats xqmstats; + +static int xqm_proc_show(struct seq_file *m, void *v) +{ + /* maximum; incore; ratio free to inuse; freelist */ + seq_printf(m, "%d\t%d\t%d\t%u\n", + ndquot, + xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, + xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, + xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); + return 0; +} + +static int xqm_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xqm_proc_show, NULL); +} + +static const struct file_operations xqm_proc_fops = { + .owner = THIS_MODULE, + .open = xqm_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int xqmstat_proc_show(struct seq_file *m, void *v) +{ + /* quota performance statistics */ + seq_printf(m, "qm %u %u %u %u %u %u %u %u\n", + xqmstats.xs_qm_dqreclaims, + xqmstats.xs_qm_dqreclaim_misses, + xqmstats.xs_qm_dquot_dups, + xqmstats.xs_qm_dqcachemisses, + xqmstats.xs_qm_dqcachehits, + xqmstats.xs_qm_dqwants, + xqmstats.xs_qm_dqshake_reclaims, + xqmstats.xs_qm_dqinact_reclaims); + return 0; +} + +static int xqmstat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xqmstat_proc_show, NULL); +} + +static const struct file_operations xqmstat_proc_fops = { + .owner = THIS_MODULE, + .open = xqmstat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void +xfs_qm_init_procfs(void) +{ + proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops); + proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops); +} + +void +xfs_qm_cleanup_procfs(void) +{ + remove_proc_entry("fs/xfs/xqm", NULL); + remove_proc_entry("fs/xfs/xqmstat", NULL); +} diff --git a/fs/xfs/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h new file mode 100644 index 0000000..5b964fc --- /dev/null +++ b/fs/xfs/xfs_qm_stats.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2002 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_QM_STATS_H__ +#define __XFS_QM_STATS_H__ + +#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) + +/* + * XQM global statistics + */ +struct xqmstats { + __uint32_t xs_qm_dqreclaims; + __uint32_t xs_qm_dqreclaim_misses; + __uint32_t xs_qm_dquot_dups; + __uint32_t xs_qm_dqcachemisses; + __uint32_t xs_qm_dqcachehits; + __uint32_t xs_qm_dqwants; + __uint32_t xs_qm_dqshake_reclaims; + __uint32_t xs_qm_dqinact_reclaims; +}; + +extern struct xqmstats xqmstats; + +# define XQM_STATS_INC(count) ( (count)++ ) + +extern void xfs_qm_init_procfs(void); +extern void xfs_qm_cleanup_procfs(void); + +#else + +# define XQM_STATS_INC(count) do { } while (0) + +static inline void xfs_qm_init_procfs(void) { }; +static inline void xfs_qm_cleanup_procfs(void) { }; + +#endif + +#endif /* __XFS_QM_STATS_H__ */ diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c new file mode 100644 index 0000000..609246f --- /dev/null +++ b/fs/xfs/xfs_qm_syscalls.c @@ -0,0 +1,906 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_qm.h" +#include "xfs_trace.h" + +STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); +STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, + uint); +STATIC uint xfs_qm_export_flags(uint); +STATIC uint xfs_qm_export_qtype_flags(uint); +STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *, + fs_disk_quota_t *); + + +/* + * Turn off quota accounting and/or enforcement for all udquots and/or + * gdquots. Called only at unmount time. + * + * This assumes that there are no dquots of this file system cached + * incore, and modifies the ondisk dquot directly. Therefore, for example, + * it is an error to call this twice, without purging the cache. + */ +int +xfs_qm_scall_quotaoff( + xfs_mount_t *mp, + uint flags) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + uint dqtype; + int error; + uint inactivate_flags; + xfs_qoff_logitem_t *qoffstart; + int nculprits; + + /* + * No file system can have quotas enabled on disk but not in core. + * Note that quota utilities (like quotaoff) _expect_ + * errno == EEXIST here. + */ + if ((mp->m_qflags & flags) == 0) + return XFS_ERROR(EEXIST); + error = 0; + + flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); + + /* + * We don't want to deal with two quotaoffs messing up each other, + * so we're going to serialize it. quotaoff isn't exactly a performance + * critical thing. + * If quotaoff, then we must be dealing with the root filesystem. + */ + ASSERT(q); + mutex_lock(&q->qi_quotaofflock); + + /* + * If we're just turning off quota enforcement, change mp and go. + */ + if ((flags & XFS_ALL_QUOTA_ACCT) == 0) { + mp->m_qflags &= ~(flags); + + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags = mp->m_qflags; + spin_unlock(&mp->m_sb_lock); + mutex_unlock(&q->qi_quotaofflock); + + /* XXX what to do if error ? Revert back to old vals incore ? */ + error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); + return (error); + } + + dqtype = 0; + inactivate_flags = 0; + /* + * If accounting is off, we must turn enforcement off, clear the + * quota 'CHKD' certificate to make it known that we have to + * do a quotacheck the next time this quota is turned on. + */ + if (flags & XFS_UQUOTA_ACCT) { + dqtype |= XFS_QMOPT_UQUOTA; + flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD); + inactivate_flags |= XFS_UQUOTA_ACTIVE; + } + if (flags & XFS_GQUOTA_ACCT) { + dqtype |= XFS_QMOPT_GQUOTA; + flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); + inactivate_flags |= XFS_GQUOTA_ACTIVE; + } else if (flags & XFS_PQUOTA_ACCT) { + dqtype |= XFS_QMOPT_PQUOTA; + flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); + inactivate_flags |= XFS_PQUOTA_ACTIVE; + } + + /* + * Nothing to do? Don't complain. This happens when we're just + * turning off quota enforcement. + */ + if ((mp->m_qflags & flags) == 0) + goto out_unlock; + + /* + * Write the LI_QUOTAOFF log record, and do SB changes atomically, + * and synchronously. If we fail to write, we should abort the + * operation as it cannot be recovered safely if we crash. + */ + error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); + if (error) + goto out_unlock; + + /* + * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct + * to take care of the race between dqget and quotaoff. We don't take + * any special locks to reset these bits. All processes need to check + * these bits *after* taking inode lock(s) to see if the particular + * quota type is in the process of being turned off. If *ACTIVE, it is + * guaranteed that all dquot structures and all quotainode ptrs will all + * stay valid as long as that inode is kept locked. + * + * There is no turning back after this. + */ + mp->m_qflags &= ~inactivate_flags; + + /* + * Give back all the dquot reference(s) held by inodes. + * Here we go thru every single incore inode in this file system, and + * do a dqrele on the i_udquot/i_gdquot that it may have. + * Essentially, as long as somebody has an inode locked, this guarantees + * that quotas will not be turned off. This is handy because in a + * transaction once we lock the inode(s) and check for quotaon, we can + * depend on the quota inodes (and other things) being valid as long as + * we keep the lock(s). + */ + xfs_qm_dqrele_all_inodes(mp, flags); + + /* + * Next we make the changes in the quota flag in the mount struct. + * This isn't protected by a particular lock directly, because we + * don't want to take a mrlock every time we depend on quotas being on. + */ + mp->m_qflags &= ~(flags); + + /* + * Go through all the dquots of this file system and purge them, + * according to what was turned off. We may not be able to get rid + * of all dquots, because dquots can have temporary references that + * are not attached to inodes. eg. xfs_setattr, xfs_create. + * So, if we couldn't purge all the dquots from the filesystem, + * we can't get rid of the incore data structures. + */ + while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype))) + delay(10 * nculprits); + + /* + * Transactions that had started before ACTIVE state bit was cleared + * could have logged many dquots, so they'd have higher LSNs than + * the first QUOTAOFF log record does. If we happen to crash when + * the tail of the log has gone past the QUOTAOFF record, but + * before the last dquot modification, those dquots __will__ + * recover, and that's not good. + * + * So, we have QUOTAOFF start and end logitems; the start + * logitem won't get overwritten until the end logitem appears... + */ + error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags); + if (error) { + /* We're screwed now. Shutdown is the only option. */ + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + goto out_unlock; + } + + /* + * If quotas is completely disabled, close shop. + */ + if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || + ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { + mutex_unlock(&q->qi_quotaofflock); + xfs_qm_destroy_quotainfo(mp); + return (0); + } + + /* + * Release our quotainode references if we don't need them anymore. + */ + if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) { + IRELE(q->qi_uquotaip); + q->qi_uquotaip = NULL; + } + if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) { + IRELE(q->qi_gquotaip); + q->qi_gquotaip = NULL; + } + +out_unlock: + mutex_unlock(&q->qi_quotaofflock); + return error; +} + +STATIC int +xfs_qm_scall_trunc_qfile( + struct xfs_mount *mp, + xfs_ino_t ino) +{ + struct xfs_inode *ip; + struct xfs_trans *tp; + int error; + + if (ino == NULLFSINO) + return 0; + + error = xfs_iget(mp, NULL, ino, 0, 0, &ip); + if (error) + return error; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + + tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); + error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT); + if (error) { + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + goto out_put; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip); + + error = xfs_itruncate_data(&tp, ip, 0); + if (error) { + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | + XFS_TRANS_ABORT); + goto out_unlock; + } + + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); +out_put: + IRELE(ip); + return error; +} + +int +xfs_qm_scall_trunc_qfiles( + xfs_mount_t *mp, + uint flags) +{ + int error = 0, error2 = 0; + + if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { + xfs_debug(mp, "%s: flags=%x m_qflags=%x\n", + __func__, flags, mp->m_qflags); + return XFS_ERROR(EINVAL); + } + + if (flags & XFS_DQ_USER) + error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); + if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) + error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); + + return error ? error : error2; +} + +/* + * Switch on (a given) quota enforcement for a filesystem. This takes + * effect immediately. + * (Switching on quota accounting must be done at mount time.) + */ +int +xfs_qm_scall_quotaon( + xfs_mount_t *mp, + uint flags) +{ + int error; + uint qf; + __int64_t sbflags; + + flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); + /* + * Switching on quota accounting must be done at mount time. + */ + flags &= ~(XFS_ALL_QUOTA_ACCT); + + sbflags = 0; + + if (flags == 0) { + xfs_debug(mp, "%s: zero flags, m_qflags=%x\n", + __func__, mp->m_qflags); + return XFS_ERROR(EINVAL); + } + + /* No fs can turn on quotas with a delayed effect */ + ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0); + + /* + * Can't enforce without accounting. We check the superblock + * qflags here instead of m_qflags because rootfs can have + * quota acct on ondisk without m_qflags' knowing. + */ + if (((flags & XFS_UQUOTA_ACCT) == 0 && + (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && + (flags & XFS_UQUOTA_ENFD)) + || + ((flags & XFS_PQUOTA_ACCT) == 0 && + (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && + (flags & XFS_GQUOTA_ACCT) == 0 && + (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && + (flags & XFS_OQUOTA_ENFD))) { + xfs_debug(mp, + "%s: Can't enforce without acct, flags=%x sbflags=%x\n", + __func__, flags, mp->m_sb.sb_qflags); + return XFS_ERROR(EINVAL); + } + /* + * If everything's up to-date incore, then don't waste time. + */ + if ((mp->m_qflags & flags) == flags) + return XFS_ERROR(EEXIST); + + /* + * Change sb_qflags on disk but not incore mp->qflags + * if this is the root filesystem. + */ + spin_lock(&mp->m_sb_lock); + qf = mp->m_sb.sb_qflags; + mp->m_sb.sb_qflags = qf | flags; + spin_unlock(&mp->m_sb_lock); + + /* + * There's nothing to change if it's the same. + */ + if ((qf & flags) == flags && sbflags == 0) + return XFS_ERROR(EEXIST); + sbflags |= XFS_SB_QFLAGS; + + if ((error = xfs_qm_write_sb_changes(mp, sbflags))) + return (error); + /* + * If we aren't trying to switch on quota enforcement, we are done. + */ + if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) != + (mp->m_qflags & XFS_UQUOTA_ACCT)) || + ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) != + (mp->m_qflags & XFS_PQUOTA_ACCT)) || + ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != + (mp->m_qflags & XFS_GQUOTA_ACCT)) || + (flags & XFS_ALL_QUOTA_ENFD) == 0) + return (0); + + if (! XFS_IS_QUOTA_RUNNING(mp)) + return XFS_ERROR(ESRCH); + + /* + * Switch on quota enforcement in core. + */ + mutex_lock(&mp->m_quotainfo->qi_quotaofflock); + mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); + mutex_unlock(&mp->m_quotainfo->qi_quotaofflock); + + return (0); +} + + +/* + * Return quota status information, such as uquota-off, enforcements, etc. + */ +int +xfs_qm_scall_getqstat( + struct xfs_mount *mp, + struct fs_quota_stat *out) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + struct xfs_inode *uip, *gip; + boolean_t tempuqip, tempgqip; + + uip = gip = NULL; + tempuqip = tempgqip = B_FALSE; + memset(out, 0, sizeof(fs_quota_stat_t)); + + out->qs_version = FS_QSTAT_VERSION; + if (!xfs_sb_version_hasquota(&mp->m_sb)) { + out->qs_uquota.qfs_ino = NULLFSINO; + out->qs_gquota.qfs_ino = NULLFSINO; + return (0); + } + out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & + (XFS_ALL_QUOTA_ACCT| + XFS_ALL_QUOTA_ENFD)); + out->qs_pad = 0; + out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino; + out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; + + if (q) { + uip = q->qi_uquotaip; + gip = q->qi_gquotaip; + } + if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { + if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, + 0, 0, &uip) == 0) + tempuqip = B_TRUE; + } + if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { + if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, + 0, 0, &gip) == 0) + tempgqip = B_TRUE; + } + if (uip) { + out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; + out->qs_uquota.qfs_nextents = uip->i_d.di_nextents; + if (tempuqip) + IRELE(uip); + } + if (gip) { + out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks; + out->qs_gquota.qfs_nextents = gip->i_d.di_nextents; + if (tempgqip) + IRELE(gip); + } + if (q) { + out->qs_incoredqs = q->qi_dquots; + out->qs_btimelimit = q->qi_btimelimit; + out->qs_itimelimit = q->qi_itimelimit; + out->qs_rtbtimelimit = q->qi_rtbtimelimit; + out->qs_bwarnlimit = q->qi_bwarnlimit; + out->qs_iwarnlimit = q->qi_iwarnlimit; + } + return 0; +} + +#define XFS_DQ_MASK \ + (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK) + +/* + * Adjust quota limits, and start/stop timers accordingly. + */ +int +xfs_qm_scall_setqlim( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + fs_disk_quota_t *newlim) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + xfs_disk_dquot_t *ddq; + xfs_dquot_t *dqp; + xfs_trans_t *tp; + int error; + xfs_qcnt_t hard, soft; + + if (newlim->d_fieldmask & ~XFS_DQ_MASK) + return EINVAL; + if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) + return 0; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); + if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, + 0, 0, XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + return (error); + } + + /* + * We don't want to race with a quotaoff so take the quotaoff lock. + * (We don't hold an inode lock, so there's nothing else to stop + * a quotaoff from happening). (XXXThis doesn't currently happen + * because we take the vfslock before calling xfs_qm_sysent). + */ + mutex_lock(&q->qi_quotaofflock); + + /* + * Get the dquot (locked), and join it to the transaction. + * Allocate the dquot if this doesn't exist. + */ + if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { + xfs_trans_cancel(tp, XFS_TRANS_ABORT); + ASSERT(error != ENOENT); + goto out_unlock; + } + xfs_trans_dqjoin(tp, dqp); + ddq = &dqp->q_core; + + /* + * Make sure that hardlimits are >= soft limits before changing. + */ + hard = (newlim->d_fieldmask & FS_DQ_BHARD) ? + (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) : + be64_to_cpu(ddq->d_blk_hardlimit); + soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ? + (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) : + be64_to_cpu(ddq->d_blk_softlimit); + if (hard == 0 || hard >= soft) { + ddq->d_blk_hardlimit = cpu_to_be64(hard); + ddq->d_blk_softlimit = cpu_to_be64(soft); + if (id == 0) { + q->qi_bhardlimit = hard; + q->qi_bsoftlimit = soft; + } + } else { + xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft); + } + hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? + (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : + be64_to_cpu(ddq->d_rtb_hardlimit); + soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ? + (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) : + be64_to_cpu(ddq->d_rtb_softlimit); + if (hard == 0 || hard >= soft) { + ddq->d_rtb_hardlimit = cpu_to_be64(hard); + ddq->d_rtb_softlimit = cpu_to_be64(soft); + if (id == 0) { + q->qi_rtbhardlimit = hard; + q->qi_rtbsoftlimit = soft; + } + } else { + xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft); + } + + hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? + (xfs_qcnt_t) newlim->d_ino_hardlimit : + be64_to_cpu(ddq->d_ino_hardlimit); + soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ? + (xfs_qcnt_t) newlim->d_ino_softlimit : + be64_to_cpu(ddq->d_ino_softlimit); + if (hard == 0 || hard >= soft) { + ddq->d_ino_hardlimit = cpu_to_be64(hard); + ddq->d_ino_softlimit = cpu_to_be64(soft); + if (id == 0) { + q->qi_ihardlimit = hard; + q->qi_isoftlimit = soft; + } + } else { + xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft); + } + + /* + * Update warnings counter(s) if requested + */ + if (newlim->d_fieldmask & FS_DQ_BWARNS) + ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns); + if (newlim->d_fieldmask & FS_DQ_IWARNS) + ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns); + if (newlim->d_fieldmask & FS_DQ_RTBWARNS) + ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns); + + if (id == 0) { + /* + * Timelimits for the super user set the relative time + * the other users can be over quota for this file system. + * If it is zero a default is used. Ditto for the default + * soft and hard limit values (already done, above), and + * for warnings. + */ + if (newlim->d_fieldmask & FS_DQ_BTIMER) { + q->qi_btimelimit = newlim->d_btimer; + ddq->d_btimer = cpu_to_be32(newlim->d_btimer); + } + if (newlim->d_fieldmask & FS_DQ_ITIMER) { + q->qi_itimelimit = newlim->d_itimer; + ddq->d_itimer = cpu_to_be32(newlim->d_itimer); + } + if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { + q->qi_rtbtimelimit = newlim->d_rtbtimer; + ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); + } + if (newlim->d_fieldmask & FS_DQ_BWARNS) + q->qi_bwarnlimit = newlim->d_bwarns; + if (newlim->d_fieldmask & FS_DQ_IWARNS) + q->qi_iwarnlimit = newlim->d_iwarns; + if (newlim->d_fieldmask & FS_DQ_RTBWARNS) + q->qi_rtbwarnlimit = newlim->d_rtbwarns; + } else { + /* + * If the user is now over quota, start the timelimit. + * The user will not be 'warned'. + * Note that we keep the timers ticking, whether enforcement + * is on or off. We don't really want to bother with iterating + * over all ondisk dquots and turning the timers on/off. + */ + xfs_qm_adjust_dqtimers(mp, ddq); + } + dqp->dq_flags |= XFS_DQ_DIRTY; + xfs_trans_log_dquot(tp, dqp); + + error = xfs_trans_commit(tp, 0); + xfs_qm_dqrele(dqp); + + out_unlock: + mutex_unlock(&q->qi_quotaofflock); + return error; +} + +int +xfs_qm_scall_getquota( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + fs_disk_quota_t *out) +{ + xfs_dquot_t *dqp; + int error; + + /* + * Try to get the dquot. We don't want it allocated on disk, so + * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't + * exist, we'll get ENOENT back. + */ + if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) { + return (error); + } + + /* + * If everything's NULL, this dquot doesn't quite exist as far as + * our utility programs are concerned. + */ + if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { + xfs_qm_dqput(dqp); + return XFS_ERROR(ENOENT); + } + /* + * Convert the disk dquot to the exportable format + */ + xfs_qm_export_dquot(mp, &dqp->q_core, out); + xfs_qm_dqput(dqp); + return (error ? XFS_ERROR(EFAULT) : 0); +} + + +STATIC int +xfs_qm_log_quotaoff_end( + xfs_mount_t *mp, + xfs_qoff_logitem_t *startqoff, + uint flags) +{ + xfs_trans_t *tp; + int error; + xfs_qoff_logitem_t *qoffi; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); + + if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2, + 0, 0, XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + return (error); + } + + qoffi = xfs_trans_get_qoff_item(tp, startqoff, + flags & XFS_ALL_QUOTA_ACCT); + xfs_trans_log_quotaoff_item(tp, qoffi); + + /* + * We have to make sure that the transaction is secure on disk before we + * return and actually stop quota accounting. So, make it synchronous. + * We don't care about quotoff's performance. + */ + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + return (error); +} + + +STATIC int +xfs_qm_log_quotaoff( + xfs_mount_t *mp, + xfs_qoff_logitem_t **qoffstartp, + uint flags) +{ + xfs_trans_t *tp; + int error; + xfs_qoff_logitem_t *qoffi=NULL; + uint oldsbqflag=0; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); + if ((error = xfs_trans_reserve(tp, 0, + sizeof(xfs_qoff_logitem_t) * 2 + + mp->m_sb.sb_sectsize + 128, + 0, + 0, + XFS_DEFAULT_LOG_COUNT))) { + goto error0; + } + + qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); + xfs_trans_log_quotaoff_item(tp, qoffi); + + spin_lock(&mp->m_sb_lock); + oldsbqflag = mp->m_sb.sb_qflags; + mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; + spin_unlock(&mp->m_sb_lock); + + xfs_mod_sb(tp, XFS_SB_QFLAGS); + + /* + * We have to make sure that the transaction is secure on disk before we + * return and actually stop quota accounting. So, make it synchronous. + * We don't care about quotoff's performance. + */ + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + +error0: + if (error) { + xfs_trans_cancel(tp, 0); + /* + * No one else is modifying sb_qflags, so this is OK. + * We still hold the quotaofflock. + */ + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags = oldsbqflag; + spin_unlock(&mp->m_sb_lock); + } + *qoffstartp = qoffi; + return (error); +} + + +/* + * Translate an internal style on-disk-dquot to the exportable format. + * The main differences are that the counters/limits are all in Basic + * Blocks (BBs) instead of the internal FSBs, and all on-disk data has + * to be converted to the native endianness. + */ +STATIC void +xfs_qm_export_dquot( + xfs_mount_t *mp, + xfs_disk_dquot_t *src, + struct fs_disk_quota *dst) +{ + memset(dst, 0, sizeof(*dst)); + dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */ + dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags); + dst->d_id = be32_to_cpu(src->d_id); + dst->d_blk_hardlimit = + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit)); + dst->d_blk_softlimit = + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit)); + dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit); + dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit); + dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount)); + dst->d_icount = be64_to_cpu(src->d_icount); + dst->d_btimer = be32_to_cpu(src->d_btimer); + dst->d_itimer = be32_to_cpu(src->d_itimer); + dst->d_iwarns = be16_to_cpu(src->d_iwarns); + dst->d_bwarns = be16_to_cpu(src->d_bwarns); + dst->d_rtb_hardlimit = + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit)); + dst->d_rtb_softlimit = + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit)); + dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount)); + dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer); + dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns); + + /* + * Internally, we don't reset all the timers when quota enforcement + * gets turned off. No need to confuse the user level code, + * so return zeroes in that case. + */ + if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) || + (!XFS_IS_OQUOTA_ENFORCED(mp) && + (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { + dst->d_btimer = 0; + dst->d_itimer = 0; + dst->d_rtbtimer = 0; + } + +#ifdef DEBUG + if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || + (XFS_IS_OQUOTA_ENFORCED(mp) && + (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && + dst->d_id != 0) { + if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && + (dst->d_blk_softlimit > 0)) { + ASSERT(dst->d_btimer != 0); + } + if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) && + (dst->d_ino_softlimit > 0)) { + ASSERT(dst->d_itimer != 0); + } + } +#endif +} + +STATIC uint +xfs_qm_export_qtype_flags( + uint flags) +{ + /* + * Can't be more than one, or none. + */ + ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) != + (FS_PROJ_QUOTA | FS_USER_QUOTA)); + ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) != + (FS_PROJ_QUOTA | FS_GROUP_QUOTA)); + ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) != + (FS_USER_QUOTA | FS_GROUP_QUOTA)); + ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0); + + return (flags & XFS_DQ_USER) ? + FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? + FS_PROJ_QUOTA : FS_GROUP_QUOTA; +} + +STATIC uint +xfs_qm_export_flags( + uint flags) +{ + uint uflags; + + uflags = 0; + if (flags & XFS_UQUOTA_ACCT) + uflags |= FS_QUOTA_UDQ_ACCT; + if (flags & XFS_PQUOTA_ACCT) + uflags |= FS_QUOTA_PDQ_ACCT; + if (flags & XFS_GQUOTA_ACCT) + uflags |= FS_QUOTA_GDQ_ACCT; + if (flags & XFS_UQUOTA_ENFD) + uflags |= FS_QUOTA_UDQ_ENFD; + if (flags & (XFS_OQUOTA_ENFD)) { + uflags |= (flags & XFS_GQUOTA_ACCT) ? + FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD; + } + return (uflags); +} + + +STATIC int +xfs_dqrele_inode( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) +{ + /* skip quota inodes */ + if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || + ip == ip->i_mount->m_quotainfo->qi_gquotaip) { + ASSERT(ip->i_udquot == NULL); + ASSERT(ip->i_gdquot == NULL); + return 0; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { + xfs_qm_dqrele(ip->i_udquot); + ip->i_udquot = NULL; + } + if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { + xfs_qm_dqrele(ip->i_gdquot); + ip->i_gdquot = NULL; + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return 0; +} + + +/* + * Go thru all the inodes in the file system, releasing their dquots. + * + * Note that the mount structure gets modified to indicate that quotas are off + * AFTER this, in the case of quotaoff. + */ +void +xfs_qm_dqrele_all_inodes( + struct xfs_mount *mp, + uint flags) +{ + ASSERT(mp->m_quotainfo); + xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); +} diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h new file mode 100644 index 0000000..94a3d92 --- /dev/null +++ b/fs/xfs/xfs_quota_priv.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_QUOTA_PRIV_H__ +#define __XFS_QUOTA_PRIV_H__ + +/* + * Number of bmaps that we ask from bmapi when doing a quotacheck. + * We make this restriction to keep the memory usage to a minimum. + */ +#define XFS_DQITER_MAP_SIZE 10 + +/* + * Hash into a bucket in the dquot hash table, based on . + */ +#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ + (__psunsigned_t)(id)) & \ + (xfs_Gqm->qm_dqhashmask - 1)) +#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \ + (xfs_Gqm->qm_usr_dqhtable + \ + XFS_DQ_HASHVAL(mp, id)) : \ + (xfs_Gqm->qm_grp_dqhtable + \ + XFS_DQ_HASHVAL(mp, id))) +#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ + !dqp->q_core.d_blk_hardlimit && \ + !dqp->q_core.d_blk_softlimit && \ + !dqp->q_core.d_rtb_hardlimit && \ + !dqp->q_core.d_rtb_softlimit && \ + !dqp->q_core.d_ino_hardlimit && \ + !dqp->q_core.d_ino_softlimit && \ + !dqp->q_core.d_bcount && \ + !dqp->q_core.d_rtbcount && \ + !dqp->q_core.d_icount) + +#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ + (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ + (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) + +#endif /* __XFS_QUOTA_PRIV_H__ */ diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c new file mode 100644 index 0000000..7e76f53 --- /dev/null +++ b/fs/xfs/xfs_quotaops.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2008, Christoph Hellwig + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_sb.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_quota.h" +#include "xfs_trans.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_qm.h" +#include + + +STATIC int +xfs_quota_type(int type) +{ + switch (type) { + case USRQUOTA: + return XFS_DQ_USER; + case GRPQUOTA: + return XFS_DQ_GROUP; + default: + return XFS_DQ_PROJ; + } +} + +STATIC int +xfs_fs_get_xstate( + struct super_block *sb, + struct fs_quota_stat *fqs) +{ + struct xfs_mount *mp = XFS_M(sb); + + if (!XFS_IS_QUOTA_RUNNING(mp)) + return -ENOSYS; + return -xfs_qm_scall_getqstat(mp, fqs); +} + +STATIC int +xfs_fs_set_xstate( + struct super_block *sb, + unsigned int uflags, + int op) +{ + struct xfs_mount *mp = XFS_M(sb); + unsigned int flags = 0; + + if (sb->s_flags & MS_RDONLY) + return -EROFS; + if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) + return -ENOSYS; + + if (uflags & FS_QUOTA_UDQ_ACCT) + flags |= XFS_UQUOTA_ACCT; + if (uflags & FS_QUOTA_PDQ_ACCT) + flags |= XFS_PQUOTA_ACCT; + if (uflags & FS_QUOTA_GDQ_ACCT) + flags |= XFS_GQUOTA_ACCT; + if (uflags & FS_QUOTA_UDQ_ENFD) + flags |= XFS_UQUOTA_ENFD; + if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD)) + flags |= XFS_OQUOTA_ENFD; + + switch (op) { + case Q_XQUOTAON: + return -xfs_qm_scall_quotaon(mp, flags); + case Q_XQUOTAOFF: + if (!XFS_IS_QUOTA_ON(mp)) + return -EINVAL; + return -xfs_qm_scall_quotaoff(mp, flags); + case Q_XQUOTARM: + if (XFS_IS_QUOTA_ON(mp)) + return -EINVAL; + return -xfs_qm_scall_trunc_qfiles(mp, flags); + } + + return -EINVAL; +} + +STATIC int +xfs_fs_get_dqblk( + struct super_block *sb, + int type, + qid_t id, + struct fs_disk_quota *fdq) +{ + struct xfs_mount *mp = XFS_M(sb); + + if (!XFS_IS_QUOTA_RUNNING(mp)) + return -ENOSYS; + if (!XFS_IS_QUOTA_ON(mp)) + return -ESRCH; + + return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq); +} + +STATIC int +xfs_fs_set_dqblk( + struct super_block *sb, + int type, + qid_t id, + struct fs_disk_quota *fdq) +{ + struct xfs_mount *mp = XFS_M(sb); + + if (sb->s_flags & MS_RDONLY) + return -EROFS; + if (!XFS_IS_QUOTA_RUNNING(mp)) + return -ENOSYS; + if (!XFS_IS_QUOTA_ON(mp)) + return -ESRCH; + + return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); +} + +const struct quotactl_ops xfs_quotactl_operations = { + .get_xstate = xfs_fs_get_xstate, + .set_xstate = xfs_fs_set_xstate, + .get_dqblk = xfs_fs_get_dqblk, + .set_dqblk = xfs_fs_set_dqblk, +}; diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c new file mode 100644 index 0000000..76fdc58 --- /dev/null +++ b/fs/xfs/xfs_stats.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include + +DEFINE_PER_CPU(struct xfsstats, xfsstats); + +static int xfs_stat_proc_show(struct seq_file *m, void *v) +{ + int c, i, j, val; + __uint64_t xs_xstrat_bytes = 0; + __uint64_t xs_write_bytes = 0; + __uint64_t xs_read_bytes = 0; + + static const struct xstats_entry { + char *desc; + int endpoint; + } xstats[] = { + { "extent_alloc", XFSSTAT_END_EXTENT_ALLOC }, + { "abt", XFSSTAT_END_ALLOC_BTREE }, + { "blk_map", XFSSTAT_END_BLOCK_MAPPING }, + { "bmbt", XFSSTAT_END_BLOCK_MAP_BTREE }, + { "dir", XFSSTAT_END_DIRECTORY_OPS }, + { "trans", XFSSTAT_END_TRANSACTIONS }, + { "ig", XFSSTAT_END_INODE_OPS }, + { "log", XFSSTAT_END_LOG_OPS }, + { "push_ail", XFSSTAT_END_TAIL_PUSHING }, + { "xstrat", XFSSTAT_END_WRITE_CONVERT }, + { "rw", XFSSTAT_END_READ_WRITE_OPS }, + { "attr", XFSSTAT_END_ATTRIBUTE_OPS }, + { "icluster", XFSSTAT_END_INODE_CLUSTER }, + { "vnodes", XFSSTAT_END_VNODE_OPS }, + { "buf", XFSSTAT_END_BUF }, + { "abtb2", XFSSTAT_END_ABTB_V2 }, + { "abtc2", XFSSTAT_END_ABTC_V2 }, + { "bmbt2", XFSSTAT_END_BMBT_V2 }, + { "ibt2", XFSSTAT_END_IBT_V2 }, + }; + + /* Loop over all stats groups */ + for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { + seq_printf(m, "%s", xstats[i].desc); + /* inner loop does each group */ + while (j < xstats[i].endpoint) { + val = 0; + /* sum over all cpus */ + for_each_possible_cpu(c) + val += *(((__u32*)&per_cpu(xfsstats, c) + j)); + seq_printf(m, " %u", val); + j++; + } + seq_putc(m, '\n'); + } + /* extra precision counters */ + for_each_possible_cpu(i) { + xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; + xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; + xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; + } + + seq_printf(m, "xpc %Lu %Lu %Lu\n", + xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); + seq_printf(m, "debug %u\n", +#if defined(DEBUG) + 1); +#else + 0); +#endif + return 0; +} + +static int xfs_stat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xfs_stat_proc_show, NULL); +} + +static const struct file_operations xfs_stat_proc_fops = { + .owner = THIS_MODULE, + .open = xfs_stat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int +xfs_init_procfs(void) +{ + if (!proc_mkdir("fs/xfs", NULL)) + goto out; + + if (!proc_create("fs/xfs/stat", 0, NULL, + &xfs_stat_proc_fops)) + goto out_remove_entry; + return 0; + + out_remove_entry: + remove_proc_entry("fs/xfs", NULL); + out: + return -ENOMEM; +} + +void +xfs_cleanup_procfs(void) +{ + remove_proc_entry("fs/xfs/stat", NULL); + remove_proc_entry("fs/xfs", NULL); +} diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h new file mode 100644 index 0000000..736854b --- /dev/null +++ b/fs/xfs/xfs_stats.h @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2000,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_STATS_H__ +#define __XFS_STATS_H__ + + +#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) + +#include + +/* + * XFS global statistics + */ +struct xfsstats { +# define XFSSTAT_END_EXTENT_ALLOC 4 + __uint32_t xs_allocx; + __uint32_t xs_allocb; + __uint32_t xs_freex; + __uint32_t xs_freeb; +# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4) + __uint32_t xs_abt_lookup; + __uint32_t xs_abt_compare; + __uint32_t xs_abt_insrec; + __uint32_t xs_abt_delrec; +# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7) + __uint32_t xs_blk_mapr; + __uint32_t xs_blk_mapw; + __uint32_t xs_blk_unmap; + __uint32_t xs_add_exlist; + __uint32_t xs_del_exlist; + __uint32_t xs_look_exlist; + __uint32_t xs_cmp_exlist; +# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4) + __uint32_t xs_bmbt_lookup; + __uint32_t xs_bmbt_compare; + __uint32_t xs_bmbt_insrec; + __uint32_t xs_bmbt_delrec; +# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4) + __uint32_t xs_dir_lookup; + __uint32_t xs_dir_create; + __uint32_t xs_dir_remove; + __uint32_t xs_dir_getdents; +# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3) + __uint32_t xs_trans_sync; + __uint32_t xs_trans_async; + __uint32_t xs_trans_empty; +# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7) + __uint32_t xs_ig_attempts; + __uint32_t xs_ig_found; + __uint32_t xs_ig_frecycle; + __uint32_t xs_ig_missed; + __uint32_t xs_ig_dup; + __uint32_t xs_ig_reclaims; + __uint32_t xs_ig_attrchg; +# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5) + __uint32_t xs_log_writes; + __uint32_t xs_log_blocks; + __uint32_t xs_log_noiclogs; + __uint32_t xs_log_force; + __uint32_t xs_log_force_sleep; +# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10) + __uint32_t xs_try_logspace; + __uint32_t xs_sleep_logspace; + __uint32_t xs_push_ail; + __uint32_t xs_push_ail_success; + __uint32_t xs_push_ail_pushbuf; + __uint32_t xs_push_ail_pinned; + __uint32_t xs_push_ail_locked; + __uint32_t xs_push_ail_flushing; + __uint32_t xs_push_ail_restarts; + __uint32_t xs_push_ail_flush; +# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2) + __uint32_t xs_xstrat_quick; + __uint32_t xs_xstrat_split; +# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2) + __uint32_t xs_write_calls; + __uint32_t xs_read_calls; +# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4) + __uint32_t xs_attr_get; + __uint32_t xs_attr_set; + __uint32_t xs_attr_remove; + __uint32_t xs_attr_list; +# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3) + __uint32_t xs_iflush_count; + __uint32_t xs_icluster_flushcnt; + __uint32_t xs_icluster_flushinode; +# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8) + __uint32_t vn_active; /* # vnodes not on free lists */ + __uint32_t vn_alloc; /* # times vn_alloc called */ + __uint32_t vn_get; /* # times vn_get called */ + __uint32_t vn_hold; /* # times vn_hold called */ + __uint32_t vn_rele; /* # times vn_rele called */ + __uint32_t vn_reclaim; /* # times vn_reclaim called */ + __uint32_t vn_remove; /* # times vn_remove called */ + __uint32_t vn_free; /* # times vn_free called */ +#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) + __uint32_t xb_get; + __uint32_t xb_create; + __uint32_t xb_get_locked; + __uint32_t xb_get_locked_waited; + __uint32_t xb_busy_locked; + __uint32_t xb_miss_locked; + __uint32_t xb_page_retries; + __uint32_t xb_page_found; + __uint32_t xb_get_read; +/* Version 2 btree counters */ +#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15) + __uint32_t xs_abtb_2_lookup; + __uint32_t xs_abtb_2_compare; + __uint32_t xs_abtb_2_insrec; + __uint32_t xs_abtb_2_delrec; + __uint32_t xs_abtb_2_newroot; + __uint32_t xs_abtb_2_killroot; + __uint32_t xs_abtb_2_increment; + __uint32_t xs_abtb_2_decrement; + __uint32_t xs_abtb_2_lshift; + __uint32_t xs_abtb_2_rshift; + __uint32_t xs_abtb_2_split; + __uint32_t xs_abtb_2_join; + __uint32_t xs_abtb_2_alloc; + __uint32_t xs_abtb_2_free; + __uint32_t xs_abtb_2_moves; +#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15) + __uint32_t xs_abtc_2_lookup; + __uint32_t xs_abtc_2_compare; + __uint32_t xs_abtc_2_insrec; + __uint32_t xs_abtc_2_delrec; + __uint32_t xs_abtc_2_newroot; + __uint32_t xs_abtc_2_killroot; + __uint32_t xs_abtc_2_increment; + __uint32_t xs_abtc_2_decrement; + __uint32_t xs_abtc_2_lshift; + __uint32_t xs_abtc_2_rshift; + __uint32_t xs_abtc_2_split; + __uint32_t xs_abtc_2_join; + __uint32_t xs_abtc_2_alloc; + __uint32_t xs_abtc_2_free; + __uint32_t xs_abtc_2_moves; +#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15) + __uint32_t xs_bmbt_2_lookup; + __uint32_t xs_bmbt_2_compare; + __uint32_t xs_bmbt_2_insrec; + __uint32_t xs_bmbt_2_delrec; + __uint32_t xs_bmbt_2_newroot; + __uint32_t xs_bmbt_2_killroot; + __uint32_t xs_bmbt_2_increment; + __uint32_t xs_bmbt_2_decrement; + __uint32_t xs_bmbt_2_lshift; + __uint32_t xs_bmbt_2_rshift; + __uint32_t xs_bmbt_2_split; + __uint32_t xs_bmbt_2_join; + __uint32_t xs_bmbt_2_alloc; + __uint32_t xs_bmbt_2_free; + __uint32_t xs_bmbt_2_moves; +#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15) + __uint32_t xs_ibt_2_lookup; + __uint32_t xs_ibt_2_compare; + __uint32_t xs_ibt_2_insrec; + __uint32_t xs_ibt_2_delrec; + __uint32_t xs_ibt_2_newroot; + __uint32_t xs_ibt_2_killroot; + __uint32_t xs_ibt_2_increment; + __uint32_t xs_ibt_2_decrement; + __uint32_t xs_ibt_2_lshift; + __uint32_t xs_ibt_2_rshift; + __uint32_t xs_ibt_2_split; + __uint32_t xs_ibt_2_join; + __uint32_t xs_ibt_2_alloc; + __uint32_t xs_ibt_2_free; + __uint32_t xs_ibt_2_moves; +/* Extra precision counters */ + __uint64_t xs_xstrat_bytes; + __uint64_t xs_write_bytes; + __uint64_t xs_read_bytes; +}; + +DECLARE_PER_CPU(struct xfsstats, xfsstats); + +/* + * We don't disable preempt, not too worried about poking the + * wrong CPU's stat for now (also aggregated before reporting). + */ +#define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++) +#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) +#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) + +extern int xfs_init_procfs(void); +extern void xfs_cleanup_procfs(void); + + +#else /* !CONFIG_PROC_FS */ + +# define XFS_STATS_INC(count) +# define XFS_STATS_DEC(count) +# define XFS_STATS_ADD(count, inc) + +static inline int xfs_init_procfs(void) +{ + return 0; +} + +static inline void xfs_cleanup_procfs(void) +{ +} + +#endif /* !CONFIG_PROC_FS */ + +#endif /* __XFS_STATS_H__ */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c new file mode 100644 index 0000000..9a72dda --- /dev/null +++ b/fs/xfs/xfs_super.c @@ -0,0 +1,1773 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xfs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_fsops.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_vnodeops.h" +#include "xfs_log_priv.h" +#include "xfs_trans_priv.h" +#include "xfs_filestream.h" +#include "xfs_da_btree.h" +#include "xfs_extfree_item.h" +#include "xfs_mru_cache.h" +#include "xfs_inode_item.h" +#include "xfs_sync.h" +#include "xfs_trace.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct super_operations xfs_super_operations; +static kmem_zone_t *xfs_ioend_zone; +mempool_t *xfs_ioend_pool; + +#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ +#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ +#define MNTOPT_LOGDEV "logdev" /* log device */ +#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */ +#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */ +#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */ +#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */ +#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */ +#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */ +#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ +#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ +#define MNTOPT_MTPT "mtpt" /* filesystem mount point */ +#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ +#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ +#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ +#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ +#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ +#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ +#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and + * unwritten extent conversion */ +#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ +#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ +#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ +#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ +#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ +#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes + * in stat(). */ +#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ +#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ +#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */ +#define MNTOPT_QUOTA "quota" /* disk quotas (user) */ +#define MNTOPT_NOQUOTA "noquota" /* no quotas */ +#define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */ +#define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */ +#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */ +#define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */ +#define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */ +#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */ +#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */ +#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ +#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ +#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ +#define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */ +#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */ +#define MNTOPT_DISCARD "discard" /* Discard unused blocks */ +#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ + +/* + * Table driven mount option parser. + * + * Currently only used for remount, but it will be used for mount + * in the future, too. + */ +enum { + Opt_barrier, Opt_nobarrier, Opt_err +}; + +static const match_table_t tokens = { + {Opt_barrier, "barrier"}, + {Opt_nobarrier, "nobarrier"}, + {Opt_err, NULL} +}; + + +STATIC unsigned long +suffix_strtoul(char *s, char **endp, unsigned int base) +{ + int last, shift_left_factor = 0; + char *value = s; + + last = strlen(value) - 1; + if (value[last] == 'K' || value[last] == 'k') { + shift_left_factor = 10; + value[last] = '\0'; + } + if (value[last] == 'M' || value[last] == 'm') { + shift_left_factor = 20; + value[last] = '\0'; + } + if (value[last] == 'G' || value[last] == 'g') { + shift_left_factor = 30; + value[last] = '\0'; + } + + return simple_strtoul((const char *)s, endp, base) << shift_left_factor; +} + +/* + * This function fills in xfs_mount_t fields based on mount args. + * Note: the superblock has _not_ yet been read in. + * + * Note that this function leaks the various device name allocations on + * failure. The caller takes care of them. + */ +STATIC int +xfs_parseargs( + struct xfs_mount *mp, + char *options) +{ + struct super_block *sb = mp->m_super; + char *this_char, *value, *eov; + int dsunit = 0; + int dswidth = 0; + int iosize = 0; + __uint8_t iosizelog = 0; + + /* + * set up the mount name first so all the errors will refer to the + * correct device. + */ + mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_fsname) + return ENOMEM; + mp->m_fsname_len = strlen(mp->m_fsname) + 1; + + /* + * Copy binary VFS mount flags we are interested in. + */ + if (sb->s_flags & MS_RDONLY) + mp->m_flags |= XFS_MOUNT_RDONLY; + if (sb->s_flags & MS_DIRSYNC) + mp->m_flags |= XFS_MOUNT_DIRSYNC; + if (sb->s_flags & MS_SYNCHRONOUS) + mp->m_flags |= XFS_MOUNT_WSYNC; + + /* + * Set some default flags that could be cleared by the mount option + * parsing. + */ + mp->m_flags |= XFS_MOUNT_BARRIER; + mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; + mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + mp->m_flags |= XFS_MOUNT_DELAYLOG; + + /* + * These can be overridden by the mount option parsing. + */ + mp->m_logbufs = -1; + mp->m_logbsize = -1; + + if (!options) + goto done; + + while ((this_char = strsep(&options, ",")) != NULL) { + if (!*this_char) + continue; + if ((value = strchr(this_char, '=')) != NULL) + *value++ = 0; + + if (!strcmp(this_char, MNTOPT_LOGBUFS)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + mp->m_logbufs = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + mp->m_logbsize = suffix_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_logname) + return ENOMEM; + } else if (!strcmp(this_char, MNTOPT_MTPT)) { + xfs_warn(mp, "%s option not allowed on this system", + this_char); + return EINVAL; + } else if (!strcmp(this_char, MNTOPT_RTDEV)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_rtname) + return ENOMEM; + } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + iosize = simple_strtoul(value, &eov, 10); + iosizelog = ffs(iosize) - 1; + } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + iosize = suffix_strtoul(value, &eov, 10); + iosizelog = ffs(iosize) - 1; + } else if (!strcmp(this_char, MNTOPT_GRPID) || + !strcmp(this_char, MNTOPT_BSDGROUPS)) { + mp->m_flags |= XFS_MOUNT_GRPID; + } else if (!strcmp(this_char, MNTOPT_NOGRPID) || + !strcmp(this_char, MNTOPT_SYSVGROUPS)) { + mp->m_flags &= ~XFS_MOUNT_GRPID; + } else if (!strcmp(this_char, MNTOPT_WSYNC)) { + mp->m_flags |= XFS_MOUNT_WSYNC; + } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { + mp->m_flags |= XFS_MOUNT_NORECOVERY; + } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { + mp->m_flags |= XFS_MOUNT_NOALIGN; + } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { + mp->m_flags |= XFS_MOUNT_SWALLOC; + } else if (!strcmp(this_char, MNTOPT_SUNIT)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + dsunit = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + dswidth = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { + mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; +#if !XFS_BIG_INUMS + xfs_warn(mp, "%s option not allowed on this system", + this_char); + return EINVAL; +#endif + } else if (!strcmp(this_char, MNTOPT_NOUUID)) { + mp->m_flags |= XFS_MOUNT_NOUUID; + } else if (!strcmp(this_char, MNTOPT_BARRIER)) { + mp->m_flags |= XFS_MOUNT_BARRIER; + } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { + mp->m_flags &= ~XFS_MOUNT_BARRIER; + } else if (!strcmp(this_char, MNTOPT_IKEEP)) { + mp->m_flags |= XFS_MOUNT_IKEEP; + } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { + mp->m_flags &= ~XFS_MOUNT_IKEEP; + } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { + mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; + } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { + mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; + } else if (!strcmp(this_char, MNTOPT_ATTR2)) { + mp->m_flags |= XFS_MOUNT_ATTR2; + } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { + mp->m_flags &= ~XFS_MOUNT_ATTR2; + mp->m_flags |= XFS_MOUNT_NOATTR2; + } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { + mp->m_flags |= XFS_MOUNT_FILESTREAMS; + } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { + mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | + XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD); + } else if (!strcmp(this_char, MNTOPT_QUOTA) || + !strcmp(this_char, MNTOPT_UQUOTA) || + !strcmp(this_char, MNTOPT_USRQUOTA)) { + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_UQUOTA_ENFD); + } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || + !strcmp(this_char, MNTOPT_UQUOTANOENF)) { + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_UQUOTA_ENFD; + } else if (!strcmp(this_char, MNTOPT_PQUOTA) || + !strcmp(this_char, MNTOPT_PRJQUOTA)) { + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | + XFS_OQUOTA_ENFD); + } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_OQUOTA_ENFD; + } else if (!strcmp(this_char, MNTOPT_GQUOTA) || + !strcmp(this_char, MNTOPT_GRPQUOTA)) { + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_OQUOTA_ENFD); + } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_OQUOTA_ENFD; + } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { + mp->m_flags |= XFS_MOUNT_DELAYLOG; + } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { + mp->m_flags &= ~XFS_MOUNT_DELAYLOG; + } else if (!strcmp(this_char, MNTOPT_DISCARD)) { + mp->m_flags |= XFS_MOUNT_DISCARD; + } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { + mp->m_flags &= ~XFS_MOUNT_DISCARD; + } else if (!strcmp(this_char, "ihashsize")) { + xfs_warn(mp, + "ihashsize no longer used, option is deprecated."); + } else if (!strcmp(this_char, "osyncisdsync")) { + xfs_warn(mp, + "osyncisdsync has no effect, option is deprecated."); + } else if (!strcmp(this_char, "osyncisosync")) { + xfs_warn(mp, + "osyncisosync has no effect, option is deprecated."); + } else if (!strcmp(this_char, "irixsgid")) { + xfs_warn(mp, + "irixsgid is now a sysctl(2) variable, option is deprecated."); + } else { + xfs_warn(mp, "unknown mount option [%s].", this_char); + return EINVAL; + } + } + + /* + * no recovery flag requires a read-only mount + */ + if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && + !(mp->m_flags & XFS_MOUNT_RDONLY)) { + xfs_warn(mp, "no-recovery mounts must be read-only."); + return EINVAL; + } + + if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { + xfs_warn(mp, + "sunit and swidth options incompatible with the noalign option"); + return EINVAL; + } + + if ((mp->m_flags & XFS_MOUNT_DISCARD) && + !(mp->m_flags & XFS_MOUNT_DELAYLOG)) { + xfs_warn(mp, + "the discard option is incompatible with the nodelaylog option"); + return EINVAL; + } + +#ifndef CONFIG_XFS_QUOTA + if (XFS_IS_QUOTA_RUNNING(mp)) { + xfs_warn(mp, "quota support not available in this kernel."); + return EINVAL; + } +#endif + + if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && + (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { + xfs_warn(mp, "cannot mount with both project and group quota"); + return EINVAL; + } + + if ((dsunit && !dswidth) || (!dsunit && dswidth)) { + xfs_warn(mp, "sunit and swidth must be specified together"); + return EINVAL; + } + + if (dsunit && (dswidth % dsunit != 0)) { + xfs_warn(mp, + "stripe width (%d) must be a multiple of the stripe unit (%d)", + dswidth, dsunit); + return EINVAL; + } + +done: + if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { + /* + * At this point the superblock has not been read + * in, therefore we do not know the block size. + * Before the mount call ends we will convert + * these to FSBs. + */ + if (dsunit) { + mp->m_dalign = dsunit; + mp->m_flags |= XFS_MOUNT_RETERR; + } + + if (dswidth) + mp->m_swidth = dswidth; + } + + if (mp->m_logbufs != -1 && + mp->m_logbufs != 0 && + (mp->m_logbufs < XLOG_MIN_ICLOGS || + mp->m_logbufs > XLOG_MAX_ICLOGS)) { + xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", + mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); + return XFS_ERROR(EINVAL); + } + if (mp->m_logbsize != -1 && + mp->m_logbsize != 0 && + (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || + mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || + !is_power_of_2(mp->m_logbsize))) { + xfs_warn(mp, + "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", + mp->m_logbsize); + return XFS_ERROR(EINVAL); + } + + if (iosizelog) { + if (iosizelog > XFS_MAX_IO_LOG || + iosizelog < XFS_MIN_IO_LOG) { + xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", + iosizelog, XFS_MIN_IO_LOG, + XFS_MAX_IO_LOG); + return XFS_ERROR(EINVAL); + } + + mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; + mp->m_readio_log = iosizelog; + mp->m_writeio_log = iosizelog; + } + + return 0; +} + +struct proc_xfs_info { + int flag; + char *str; +}; + +STATIC int +xfs_showargs( + struct xfs_mount *mp, + struct seq_file *m) +{ + static struct proc_xfs_info xfs_info_set[] = { + /* the few simple ones we can get from the mount struct */ + { XFS_MOUNT_IKEEP, "," MNTOPT_IKEEP }, + { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, + { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, + { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, + { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, + { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, + { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, + { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, + { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, + { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, + { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, + { 0, NULL } + }; + static struct proc_xfs_info xfs_info_unset[] = { + /* the few simple ones we can get from the mount struct */ + { XFS_MOUNT_COMPAT_IOSIZE, "," MNTOPT_LARGEIO }, + { XFS_MOUNT_BARRIER, "," MNTOPT_NOBARRIER }, + { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_64BITINODE }, + { 0, NULL } + }; + struct proc_xfs_info *xfs_infop; + + for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { + if (mp->m_flags & xfs_infop->flag) + seq_puts(m, xfs_infop->str); + } + for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) { + if (!(mp->m_flags & xfs_infop->flag)) + seq_puts(m, xfs_infop->str); + } + + if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) + seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", + (int)(1 << mp->m_writeio_log) >> 10); + + if (mp->m_logbufs > 0) + seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); + if (mp->m_logbsize > 0) + seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); + + if (mp->m_logname) + seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); + if (mp->m_rtname) + seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); + + if (mp->m_dalign > 0) + seq_printf(m, "," MNTOPT_SUNIT "=%d", + (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); + if (mp->m_swidth > 0) + seq_printf(m, "," MNTOPT_SWIDTH "=%d", + (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); + + if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD)) + seq_puts(m, "," MNTOPT_USRQUOTA); + else if (mp->m_qflags & XFS_UQUOTA_ACCT) + seq_puts(m, "," MNTOPT_UQUOTANOENF); + + /* Either project or group quotas can be active, not both */ + + if (mp->m_qflags & XFS_PQUOTA_ACCT) { + if (mp->m_qflags & XFS_OQUOTA_ENFD) + seq_puts(m, "," MNTOPT_PRJQUOTA); + else + seq_puts(m, "," MNTOPT_PQUOTANOENF); + } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { + if (mp->m_qflags & XFS_OQUOTA_ENFD) + seq_puts(m, "," MNTOPT_GRPQUOTA); + else + seq_puts(m, "," MNTOPT_GQUOTANOENF); + } + + if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) + seq_puts(m, "," MNTOPT_NOQUOTA); + + return 0; +} +__uint64_t +xfs_max_file_offset( + unsigned int blockshift) +{ + unsigned int pagefactor = 1; + unsigned int bitshift = BITS_PER_LONG - 1; + + /* Figure out maximum filesize, on Linux this can depend on + * the filesystem blocksize (on 32 bit platforms). + * __block_write_begin does this in an [unsigned] long... + * page->index << (PAGE_CACHE_SHIFT - bbits) + * So, for page sized blocks (4K on 32 bit platforms), + * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is + * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) + * but for smaller blocksizes it is less (bbits = log2 bsize). + * Note1: get_block_t takes a long (implicit cast from above) + * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch + * can optionally convert the [unsigned] long from above into + * an [unsigned] long long. + */ + +#if BITS_PER_LONG == 32 +# if defined(CONFIG_LBDAF) + ASSERT(sizeof(sector_t) == 8); + pagefactor = PAGE_CACHE_SIZE; + bitshift = BITS_PER_LONG; +# else + pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift); +# endif +#endif + + return (((__uint64_t)pagefactor) << bitshift) - 1; +} + +STATIC int +xfs_blkdev_get( + xfs_mount_t *mp, + const char *name, + struct block_device **bdevp) +{ + int error = 0; + + *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + mp); + if (IS_ERR(*bdevp)) { + error = PTR_ERR(*bdevp); + xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); + } + + return -error; +} + +STATIC void +xfs_blkdev_put( + struct block_device *bdev) +{ + if (bdev) + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); +} + +void +xfs_blkdev_issue_flush( + xfs_buftarg_t *buftarg) +{ + blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); +} + +STATIC void +xfs_close_devices( + struct xfs_mount *mp) +{ + if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { + struct block_device *logdev = mp->m_logdev_targp->bt_bdev; + xfs_free_buftarg(mp, mp->m_logdev_targp); + xfs_blkdev_put(logdev); + } + if (mp->m_rtdev_targp) { + struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; + xfs_free_buftarg(mp, mp->m_rtdev_targp); + xfs_blkdev_put(rtdev); + } + xfs_free_buftarg(mp, mp->m_ddev_targp); +} + +/* + * The file system configurations are: + * (1) device (partition) with data and internal log + * (2) logical volume with data and log subvolumes. + * (3) logical volume with data, log, and realtime subvolumes. + * + * We only have to handle opening the log and realtime volumes here if + * they are present. The data subvolume has already been opened by + * get_sb_bdev() and is stored in sb->s_bdev. + */ +STATIC int +xfs_open_devices( + struct xfs_mount *mp) +{ + struct block_device *ddev = mp->m_super->s_bdev; + struct block_device *logdev = NULL, *rtdev = NULL; + int error; + + /* + * Open real time and log devices - order is important. + */ + if (mp->m_logname) { + error = xfs_blkdev_get(mp, mp->m_logname, &logdev); + if (error) + goto out; + } + + if (mp->m_rtname) { + error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev); + if (error) + goto out_close_logdev; + + if (rtdev == ddev || rtdev == logdev) { + xfs_warn(mp, + "Cannot mount filesystem with identical rtdev and ddev/logdev."); + error = EINVAL; + goto out_close_rtdev; + } + } + + /* + * Setup xfs_mount buffer target pointers + */ + error = ENOMEM; + mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); + if (!mp->m_ddev_targp) + goto out_close_rtdev; + + if (rtdev) { + mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, + mp->m_fsname); + if (!mp->m_rtdev_targp) + goto out_free_ddev_targ; + } + + if (logdev && logdev != ddev) { + mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, + mp->m_fsname); + if (!mp->m_logdev_targp) + goto out_free_rtdev_targ; + } else { + mp->m_logdev_targp = mp->m_ddev_targp; + } + + return 0; + + out_free_rtdev_targ: + if (mp->m_rtdev_targp) + xfs_free_buftarg(mp, mp->m_rtdev_targp); + out_free_ddev_targ: + xfs_free_buftarg(mp, mp->m_ddev_targp); + out_close_rtdev: + if (rtdev) + xfs_blkdev_put(rtdev); + out_close_logdev: + if (logdev && logdev != ddev) + xfs_blkdev_put(logdev); + out: + return error; +} + +/* + * Setup xfs_mount buffer target pointers based on superblock + */ +STATIC int +xfs_setup_devices( + struct xfs_mount *mp) +{ + int error; + + error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize, + mp->m_sb.sb_sectsize); + if (error) + return error; + + if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { + unsigned int log_sector_size = BBSIZE; + + if (xfs_sb_version_hassector(&mp->m_sb)) + log_sector_size = mp->m_sb.sb_logsectsize; + error = xfs_setsize_buftarg(mp->m_logdev_targp, + mp->m_sb.sb_blocksize, + log_sector_size); + if (error) + return error; + } + if (mp->m_rtdev_targp) { + error = xfs_setsize_buftarg(mp->m_rtdev_targp, + mp->m_sb.sb_blocksize, + mp->m_sb.sb_sectsize); + if (error) + return error; + } + + return 0; +} + +/* Catch misguided souls that try to use this interface on XFS */ +STATIC struct inode * +xfs_fs_alloc_inode( + struct super_block *sb) +{ + BUG(); + return NULL; +} + +/* + * Now that the generic code is guaranteed not to be accessing + * the linux inode, we can reclaim the inode. + */ +STATIC void +xfs_fs_destroy_inode( + struct inode *inode) +{ + struct xfs_inode *ip = XFS_I(inode); + + trace_xfs_destroy_inode(ip); + + XFS_STATS_INC(vn_reclaim); + + /* bad inode, get out here ASAP */ + if (is_bad_inode(inode)) + goto out_reclaim; + + xfs_ioend_wait(ip); + + ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); + + /* + * We should never get here with one of the reclaim flags already set. + */ + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); + + /* + * We always use background reclaim here because even if the + * inode is clean, it still may be under IO and hence we have + * to take the flush lock. The background reclaim path handles + * this more efficiently than we can here, so simply let background + * reclaim tear down all inodes. + */ +out_reclaim: + xfs_inode_set_reclaim_tag(ip); +} + +/* + * Slab object creation initialisation for the XFS inode. + * This covers only the idempotent fields in the XFS inode; + * all other fields need to be initialised on allocation + * from the slab. This avoids the need to repeatedly initialise + * fields in the xfs inode that left in the initialise state + * when freeing the inode. + */ +STATIC void +xfs_fs_inode_init_once( + void *inode) +{ + struct xfs_inode *ip = inode; + + memset(ip, 0, sizeof(struct xfs_inode)); + + /* vfs inode */ + inode_init_once(VFS_I(ip)); + + /* xfs inode */ + atomic_set(&ip->i_iocount, 0); + atomic_set(&ip->i_pincount, 0); + spin_lock_init(&ip->i_flags_lock); + init_waitqueue_head(&ip->i_ipin_wait); + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&ip->i_flush); + complete(&ip->i_flush); + + mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, + "xfsino", ip->i_ino); +} + +/* + * Dirty the XFS inode when mark_inode_dirty_sync() is called so that + * we catch unlogged VFS level updates to the inode. + * + * We need the barrier() to maintain correct ordering between unlogged + * updates and the transaction commit code that clears the i_update_core + * field. This requires all updates to be completed before marking the + * inode dirty. + */ +STATIC void +xfs_fs_dirty_inode( + struct inode *inode, + int flags) +{ + barrier(); + XFS_I(inode)->i_update_core = 1; +} + +STATIC int +xfs_log_inode( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + + if (error) { + xfs_trans_cancel(tp, 0); + /* we need to return with the lock hold shared */ + xfs_ilock(ip, XFS_ILOCK_SHARED); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Note - it's possible that we might have pushed ourselves out of the + * way during trans_reserve which would flush the inode. But there's + * no guarantee that the inode buffer has actually gone out yet (it's + * delwri). Plus the buffer could be pinned anyway if it's part of + * an inode in another recent transaction. So we play it safe and + * fire off the transaction anyway. + */ + xfs_trans_ijoin(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = xfs_trans_commit(tp, 0); + xfs_ilock_demote(ip, XFS_ILOCK_EXCL); + + return error; +} + +STATIC int +xfs_fs_write_inode( + struct inode *inode, + struct writeback_control *wbc) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + int error = EAGAIN; + + trace_xfs_write_inode(ip); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + if (wbc->sync_mode == WB_SYNC_ALL) { + /* + * Make sure the inode has made it it into the log. Instead + * of forcing it all the way to stable storage using a + * synchronous transaction we let the log force inside the + * ->sync_fs call do that for thus, which reduces the number + * of synchronous log foces dramatically. + */ + xfs_ioend_wait(ip); + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (ip->i_update_core) { + error = xfs_log_inode(ip); + if (error) + goto out_unlock; + } + } else { + /* + * We make this non-blocking if the inode is contended, return + * EAGAIN to indicate to the caller that they did not succeed. + * This prevents the flush path from blocking on inodes inside + * another operation right now, they get caught later by + * xfs_sync. + */ + if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) + goto out; + + if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) + goto out_unlock; + + /* + * Now we have the flush lock and the inode is not pinned, we + * can check if the inode is really clean as we know that + * there are no pending transaction completions, it is not + * waiting on the delayed write queue and there is no IO in + * progress. + */ + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + error = 0; + goto out_unlock; + } + error = xfs_iflush(ip, SYNC_TRYLOCK); + } + + out_unlock: + xfs_iunlock(ip, XFS_ILOCK_SHARED); + out: + /* + * if we failed to write out the inode then mark + * it dirty again so we'll try again later. + */ + if (error) + xfs_mark_inode_dirty_sync(ip); + return -error; +} + +STATIC void +xfs_fs_evict_inode( + struct inode *inode) +{ + xfs_inode_t *ip = XFS_I(inode); + + trace_xfs_evict_inode(ip); + + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); + XFS_STATS_INC(vn_rele); + XFS_STATS_INC(vn_remove); + XFS_STATS_DEC(vn_active); + + /* + * The iolock is used by the file system to coordinate reads, + * writes, and block truncates. Up to this point the lock + * protected concurrent accesses by users of the inode. But + * from here forward we're doing some final processing of the + * inode because we're done with it, and although we reuse the + * iolock for protection it is really a distinct lock class + * (in the lockdep sense) from before. To keep lockdep happy + * (and basically indicate what we are doing), we explicitly + * re-init the iolock here. + */ + ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); + mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); + lockdep_set_class_and_name(&ip->i_iolock.mr_lock, + &xfs_iolock_reclaimable, "xfs_iolock_reclaimable"); + + xfs_inactive(ip); +} + +STATIC void +xfs_free_fsname( + struct xfs_mount *mp) +{ + kfree(mp->m_fsname); + kfree(mp->m_rtname); + kfree(mp->m_logname); +} + +STATIC void +xfs_fs_put_super( + struct super_block *sb) +{ + struct xfs_mount *mp = XFS_M(sb); + + xfs_syncd_stop(mp); + + /* + * Blow away any referenced inode in the filestreams cache. + * This can and will cause log traffic as inodes go inactive + * here. + */ + xfs_filestream_unmount(mp); + + XFS_bflush(mp->m_ddev_targp); + + xfs_unmountfs(mp); + xfs_freesb(mp); + xfs_icsb_destroy_counters(mp); + xfs_close_devices(mp); + xfs_free_fsname(mp); + kfree(mp); +} + +STATIC int +xfs_fs_sync_fs( + struct super_block *sb, + int wait) +{ + struct xfs_mount *mp = XFS_M(sb); + int error; + + /* + * Not much we can do for the first async pass. Writing out the + * superblock would be counter-productive as we are going to redirty + * when writing out other data and metadata (and writing out a single + * block is quite fast anyway). + * + * Try to asynchronously kick off quota syncing at least. + */ + if (!wait) { + xfs_qm_sync(mp, SYNC_TRYLOCK); + return 0; + } + + error = xfs_quiesce_data(mp); + if (error) + return -error; + + if (laptop_mode) { + /* + * The disk must be active because we're syncing. + * We schedule xfssyncd now (now that the disk is + * active) instead of later (when it might not be). + */ + flush_delayed_work_sync(&mp->m_sync_work); + } + + return 0; +} + +STATIC int +xfs_fs_statfs( + struct dentry *dentry, + struct kstatfs *statp) +{ + struct xfs_mount *mp = XFS_M(dentry->d_sb); + xfs_sb_t *sbp = &mp->m_sb; + struct xfs_inode *ip = XFS_I(dentry->d_inode); + __uint64_t fakeinos, id; + xfs_extlen_t lsize; + __int64_t ffree; + + statp->f_type = XFS_SB_MAGIC; + statp->f_namelen = MAXNAMELEN - 1; + + id = huge_encode_dev(mp->m_ddev_targp->bt_dev); + statp->f_fsid.val[0] = (u32)id; + statp->f_fsid.val[1] = (u32)(id >> 32); + + xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); + + spin_lock(&mp->m_sb_lock); + statp->f_bsize = sbp->sb_blocksize; + lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; + statp->f_blocks = sbp->sb_dblocks - lsize; + statp->f_bfree = statp->f_bavail = + sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); + fakeinos = statp->f_bfree << sbp->sb_inopblog; + statp->f_files = + MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); + if (mp->m_maxicount) + statp->f_files = min_t(typeof(statp->f_files), + statp->f_files, + mp->m_maxicount); + + /* make sure statp->f_ffree does not underflow */ + ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); + statp->f_ffree = max_t(__int64_t, ffree, 0); + + spin_unlock(&mp->m_sb_lock); + + if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || + ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == + (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) + xfs_qm_statvfs(ip, statp); + return 0; +} + +STATIC void +xfs_save_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks = 0; + + mp->m_resblks_save = mp->m_resblks; + xfs_reserve_blocks(mp, &resblks, NULL); +} + +STATIC void +xfs_restore_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks; + + if (mp->m_resblks_save) { + resblks = mp->m_resblks_save; + mp->m_resblks_save = 0; + } else + resblks = xfs_default_resblks(mp); + + xfs_reserve_blocks(mp, &resblks, NULL); +} + +STATIC int +xfs_fs_remount( + struct super_block *sb, + int *flags, + char *options) +{ + struct xfs_mount *mp = XFS_M(sb); + substring_t args[MAX_OPT_ARGS]; + char *p; + int error; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_barrier: + mp->m_flags |= XFS_MOUNT_BARRIER; + break; + case Opt_nobarrier: + mp->m_flags &= ~XFS_MOUNT_BARRIER; + break; + default: + /* + * Logically we would return an error here to prevent + * users from believing they might have changed + * mount options using remount which can't be changed. + * + * But unfortunately mount(8) adds all options from + * mtab and fstab to the mount arguments in some cases + * so we can't blindly reject options, but have to + * check for each specified option if it actually + * differs from the currently set option and only + * reject it if that's the case. + * + * Until that is implemented we return success for + * every remount request, and silently ignore all + * options that we can't actually change. + */ +#if 0 + xfs_info(mp, + "mount option \"%s\" not supported for remount\n", p); + return -EINVAL; +#else + break; +#endif + } + } + + /* ro -> rw */ + if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { + mp->m_flags &= ~XFS_MOUNT_RDONLY; + + /* + * If this is the first remount to writeable state we + * might have some superblock changes to update. + */ + if (mp->m_update_flags) { + error = xfs_mount_log_sb(mp, mp->m_update_flags); + if (error) { + xfs_warn(mp, "failed to write sb changes"); + return error; + } + mp->m_update_flags = 0; + } + + /* + * Fill out the reserve pool if it is empty. Use the stashed + * value if it is non-zero, otherwise go with the default. + */ + xfs_restore_resvblks(mp); + } + + /* rw -> ro */ + if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + /* + * After we have synced the data but before we sync the + * metadata, we need to free up the reserve block pool so that + * the used block count in the superblock on disk is correct at + * the end of the remount. Stash the current reserve pool size + * so that if we get remounted rw, we can return it to the same + * size. + */ + + xfs_quiesce_data(mp); + xfs_save_resvblks(mp); + xfs_quiesce_attr(mp); + mp->m_flags |= XFS_MOUNT_RDONLY; + } + + return 0; +} + +/* + * Second stage of a freeze. The data is already frozen so we only + * need to take care of the metadata. Once that's done write a dummy + * record to dirty the log in case of a crash while frozen. + */ +STATIC int +xfs_fs_freeze( + struct super_block *sb) +{ + struct xfs_mount *mp = XFS_M(sb); + + xfs_save_resvblks(mp); + xfs_quiesce_attr(mp); + return -xfs_fs_log_dummy(mp); +} + +STATIC int +xfs_fs_unfreeze( + struct super_block *sb) +{ + struct xfs_mount *mp = XFS_M(sb); + + xfs_restore_resvblks(mp); + return 0; +} + +STATIC int +xfs_fs_show_options( + struct seq_file *m, + struct vfsmount *mnt) +{ + return -xfs_showargs(XFS_M(mnt->mnt_sb), m); +} + +/* + * This function fills in xfs_mount_t fields based on mount args. + * Note: the superblock _has_ now been read in. + */ +STATIC int +xfs_finish_flags( + struct xfs_mount *mp) +{ + int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); + + /* Fail a mount where the logbuf is smaller than the log stripe */ + if (xfs_sb_version_haslogv2(&mp->m_sb)) { + if (mp->m_logbsize <= 0 && + mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { + mp->m_logbsize = mp->m_sb.sb_logsunit; + } else if (mp->m_logbsize > 0 && + mp->m_logbsize < mp->m_sb.sb_logsunit) { + xfs_warn(mp, + "logbuf size must be greater than or equal to log stripe size"); + return XFS_ERROR(EINVAL); + } + } else { + /* Fail a mount if the logbuf is larger than 32K */ + if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { + xfs_warn(mp, + "logbuf size for version 1 logs must be 16K or 32K"); + return XFS_ERROR(EINVAL); + } + } + + /* + * mkfs'ed attr2 will turn on attr2 mount unless explicitly + * told by noattr2 to turn it off + */ + if (xfs_sb_version_hasattr2(&mp->m_sb) && + !(mp->m_flags & XFS_MOUNT_NOATTR2)) + mp->m_flags |= XFS_MOUNT_ATTR2; + + /* + * prohibit r/w mounts of read-only filesystems + */ + if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { + xfs_warn(mp, + "cannot mount a read-only filesystem as read-write"); + return XFS_ERROR(EROFS); + } + + return 0; +} + +STATIC int +xfs_fs_fill_super( + struct super_block *sb, + void *data, + int silent) +{ + struct inode *root; + struct xfs_mount *mp = NULL; + int flags = 0, error = ENOMEM; + + mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); + if (!mp) + goto out; + + spin_lock_init(&mp->m_sb_lock); + mutex_init(&mp->m_growlock); + atomic_set(&mp->m_active_trans, 0); + + mp->m_super = sb; + sb->s_fs_info = mp; + + error = xfs_parseargs(mp, (char *)data); + if (error) + goto out_free_fsname; + + sb_min_blocksize(sb, BBSIZE); + sb->s_xattr = xfs_xattr_handlers; + sb->s_export_op = &xfs_export_operations; +#ifdef CONFIG_XFS_QUOTA + sb->s_qcop = &xfs_quotactl_operations; +#endif + sb->s_op = &xfs_super_operations; + + if (silent) + flags |= XFS_MFSI_QUIET; + + error = xfs_open_devices(mp); + if (error) + goto out_free_fsname; + + error = xfs_icsb_init_counters(mp); + if (error) + goto out_close_devices; + + error = xfs_readsb(mp, flags); + if (error) + goto out_destroy_counters; + + error = xfs_finish_flags(mp); + if (error) + goto out_free_sb; + + error = xfs_setup_devices(mp); + if (error) + goto out_free_sb; + + error = xfs_filestream_mount(mp); + if (error) + goto out_free_sb; + + /* + * we must configure the block size in the superblock before we run the + * full mount process as the mount process can lookup and cache inodes. + * For the same reason we must also initialise the syncd and register + * the inode cache shrinker so that inodes can be reclaimed during + * operations like a quotacheck that iterate all inodes in the + * filesystem. + */ + sb->s_magic = XFS_SB_MAGIC; + sb->s_blocksize = mp->m_sb.sb_blocksize; + sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; + sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); + sb->s_time_gran = 1; + set_posix_acl_flag(sb); + + error = xfs_mountfs(mp); + if (error) + goto out_filestream_unmount; + + error = xfs_syncd_init(mp); + if (error) + goto out_unmount; + + root = igrab(VFS_I(mp->m_rootip)); + if (!root) { + error = ENOENT; + goto out_syncd_stop; + } + if (is_bad_inode(root)) { + error = EINVAL; + goto out_syncd_stop; + } + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + error = ENOMEM; + goto out_iput; + } + + return 0; + + out_filestream_unmount: + xfs_filestream_unmount(mp); + out_free_sb: + xfs_freesb(mp); + out_destroy_counters: + xfs_icsb_destroy_counters(mp); + out_close_devices: + xfs_close_devices(mp); + out_free_fsname: + xfs_free_fsname(mp); + kfree(mp); + out: + return -error; + + out_iput: + iput(root); + out_syncd_stop: + xfs_syncd_stop(mp); + out_unmount: + /* + * Blow away any referenced inode in the filestreams cache. + * This can and will cause log traffic as inodes go inactive + * here. + */ + xfs_filestream_unmount(mp); + + XFS_bflush(mp->m_ddev_targp); + + xfs_unmountfs(mp); + goto out_free_sb; +} + +STATIC struct dentry * +xfs_fs_mount( + struct file_system_type *fs_type, + int flags, + const char *dev_name, + void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); +} + +static int +xfs_fs_nr_cached_objects( + struct super_block *sb) +{ + return xfs_reclaim_inodes_count(XFS_M(sb)); +} + +static void +xfs_fs_free_cached_objects( + struct super_block *sb, + int nr_to_scan) +{ + xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); +} + +static const struct super_operations xfs_super_operations = { + .alloc_inode = xfs_fs_alloc_inode, + .destroy_inode = xfs_fs_destroy_inode, + .dirty_inode = xfs_fs_dirty_inode, + .write_inode = xfs_fs_write_inode, + .evict_inode = xfs_fs_evict_inode, + .put_super = xfs_fs_put_super, + .sync_fs = xfs_fs_sync_fs, + .freeze_fs = xfs_fs_freeze, + .unfreeze_fs = xfs_fs_unfreeze, + .statfs = xfs_fs_statfs, + .remount_fs = xfs_fs_remount, + .show_options = xfs_fs_show_options, + .nr_cached_objects = xfs_fs_nr_cached_objects, + .free_cached_objects = xfs_fs_free_cached_objects, +}; + +static struct file_system_type xfs_fs_type = { + .owner = THIS_MODULE, + .name = "xfs", + .mount = xfs_fs_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +STATIC int __init +xfs_init_zones(void) +{ + + xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); + if (!xfs_ioend_zone) + goto out; + + xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, + xfs_ioend_zone); + if (!xfs_ioend_pool) + goto out_destroy_ioend_zone; + + xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), + "xfs_log_ticket"); + if (!xfs_log_ticket_zone) + goto out_destroy_ioend_pool; + + xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), + "xfs_bmap_free_item"); + if (!xfs_bmap_free_item_zone) + goto out_destroy_log_ticket_zone; + + xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), + "xfs_btree_cur"); + if (!xfs_btree_cur_zone) + goto out_destroy_bmap_free_item_zone; + + xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), + "xfs_da_state"); + if (!xfs_da_state_zone) + goto out_destroy_btree_cur_zone; + + xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); + if (!xfs_dabuf_zone) + goto out_destroy_da_state_zone; + + xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); + if (!xfs_ifork_zone) + goto out_destroy_dabuf_zone; + + xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); + if (!xfs_trans_zone) + goto out_destroy_ifork_zone; + + xfs_log_item_desc_zone = + kmem_zone_init(sizeof(struct xfs_log_item_desc), + "xfs_log_item_desc"); + if (!xfs_log_item_desc_zone) + goto out_destroy_trans_zone; + + /* + * The size of the zone allocated buf log item is the maximum + * size possible under XFS. This wastes a little bit of memory, + * but it is much faster. + */ + xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + + (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / + NBWORD) * sizeof(int))), "xfs_buf_item"); + if (!xfs_buf_item_zone) + goto out_destroy_log_item_desc_zone; + + xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) + + ((XFS_EFD_MAX_FAST_EXTENTS - 1) * + sizeof(xfs_extent_t))), "xfs_efd_item"); + if (!xfs_efd_zone) + goto out_destroy_buf_item_zone; + + xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) + + ((XFS_EFI_MAX_FAST_EXTENTS - 1) * + sizeof(xfs_extent_t))), "xfs_efi_item"); + if (!xfs_efi_zone) + goto out_destroy_efd_zone; + + xfs_inode_zone = + kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", + KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD, + xfs_fs_inode_init_once); + if (!xfs_inode_zone) + goto out_destroy_efi_zone; + + xfs_ili_zone = + kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", + KM_ZONE_SPREAD, NULL); + if (!xfs_ili_zone) + goto out_destroy_inode_zone; + + return 0; + + out_destroy_inode_zone: + kmem_zone_destroy(xfs_inode_zone); + out_destroy_efi_zone: + kmem_zone_destroy(xfs_efi_zone); + out_destroy_efd_zone: + kmem_zone_destroy(xfs_efd_zone); + out_destroy_buf_item_zone: + kmem_zone_destroy(xfs_buf_item_zone); + out_destroy_log_item_desc_zone: + kmem_zone_destroy(xfs_log_item_desc_zone); + out_destroy_trans_zone: + kmem_zone_destroy(xfs_trans_zone); + out_destroy_ifork_zone: + kmem_zone_destroy(xfs_ifork_zone); + out_destroy_dabuf_zone: + kmem_zone_destroy(xfs_dabuf_zone); + out_destroy_da_state_zone: + kmem_zone_destroy(xfs_da_state_zone); + out_destroy_btree_cur_zone: + kmem_zone_destroy(xfs_btree_cur_zone); + out_destroy_bmap_free_item_zone: + kmem_zone_destroy(xfs_bmap_free_item_zone); + out_destroy_log_ticket_zone: + kmem_zone_destroy(xfs_log_ticket_zone); + out_destroy_ioend_pool: + mempool_destroy(xfs_ioend_pool); + out_destroy_ioend_zone: + kmem_zone_destroy(xfs_ioend_zone); + out: + return -ENOMEM; +} + +STATIC void +xfs_destroy_zones(void) +{ + kmem_zone_destroy(xfs_ili_zone); + kmem_zone_destroy(xfs_inode_zone); + kmem_zone_destroy(xfs_efi_zone); + kmem_zone_destroy(xfs_efd_zone); + kmem_zone_destroy(xfs_buf_item_zone); + kmem_zone_destroy(xfs_log_item_desc_zone); + kmem_zone_destroy(xfs_trans_zone); + kmem_zone_destroy(xfs_ifork_zone); + kmem_zone_destroy(xfs_dabuf_zone); + kmem_zone_destroy(xfs_da_state_zone); + kmem_zone_destroy(xfs_btree_cur_zone); + kmem_zone_destroy(xfs_bmap_free_item_zone); + kmem_zone_destroy(xfs_log_ticket_zone); + mempool_destroy(xfs_ioend_pool); + kmem_zone_destroy(xfs_ioend_zone); + +} + +STATIC int __init +xfs_init_workqueues(void) +{ + /* + * max_active is set to 8 to give enough concurency to allow + * multiple work operations on each CPU to run. This allows multiple + * filesystems to be running sync work concurrently, and scales with + * the number of CPUs in the system. + */ + xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); + if (!xfs_syncd_wq) + goto out; + + xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); + if (!xfs_ail_wq) + goto out_destroy_syncd; + + return 0; + +out_destroy_syncd: + destroy_workqueue(xfs_syncd_wq); +out: + return -ENOMEM; +} + +STATIC void +xfs_destroy_workqueues(void) +{ + destroy_workqueue(xfs_ail_wq); + destroy_workqueue(xfs_syncd_wq); +} + +STATIC int __init +init_xfs_fs(void) +{ + int error; + + printk(KERN_INFO XFS_VERSION_STRING " with " + XFS_BUILD_OPTIONS " enabled\n"); + + xfs_ioend_init(); + xfs_dir_startup(); + + error = xfs_init_zones(); + if (error) + goto out; + + error = xfs_init_workqueues(); + if (error) + goto out_destroy_zones; + + error = xfs_mru_cache_init(); + if (error) + goto out_destroy_wq; + + error = xfs_filestream_init(); + if (error) + goto out_mru_cache_uninit; + + error = xfs_buf_init(); + if (error) + goto out_filestream_uninit; + + error = xfs_init_procfs(); + if (error) + goto out_buf_terminate; + + error = xfs_sysctl_register(); + if (error) + goto out_cleanup_procfs; + + vfs_initquota(); + + error = register_filesystem(&xfs_fs_type); + if (error) + goto out_sysctl_unregister; + return 0; + + out_sysctl_unregister: + xfs_sysctl_unregister(); + out_cleanup_procfs: + xfs_cleanup_procfs(); + out_buf_terminate: + xfs_buf_terminate(); + out_filestream_uninit: + xfs_filestream_uninit(); + out_mru_cache_uninit: + xfs_mru_cache_uninit(); + out_destroy_wq: + xfs_destroy_workqueues(); + out_destroy_zones: + xfs_destroy_zones(); + out: + return error; +} + +STATIC void __exit +exit_xfs_fs(void) +{ + vfs_exitquota(); + unregister_filesystem(&xfs_fs_type); + xfs_sysctl_unregister(); + xfs_cleanup_procfs(); + xfs_buf_terminate(); + xfs_filestream_uninit(); + xfs_mru_cache_uninit(); + xfs_destroy_workqueues(); + xfs_destroy_zones(); +} + +module_init(init_xfs_fs); +module_exit(exit_xfs_fs); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); +MODULE_LICENSE("GPL"); diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h new file mode 100644 index 0000000..50a3266 --- /dev/null +++ b/fs/xfs/xfs_super.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SUPER_H__ +#define __XFS_SUPER_H__ + +#include + +#ifdef CONFIG_XFS_QUOTA +extern void xfs_qm_init(void); +extern void xfs_qm_exit(void); +# define vfs_initquota() xfs_qm_init() +# define vfs_exitquota() xfs_qm_exit() +#else +# define vfs_initquota() do { } while (0) +# define vfs_exitquota() do { } while (0) +#endif + +#ifdef CONFIG_XFS_POSIX_ACL +# define XFS_ACL_STRING "ACLs, " +# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL) +#else +# define XFS_ACL_STRING +# define set_posix_acl_flag(sb) do { } while (0) +#endif + +#define XFS_SECURITY_STRING "security attributes, " + +#ifdef CONFIG_XFS_RT +# define XFS_REALTIME_STRING "realtime, " +#else +# define XFS_REALTIME_STRING +#endif + +#if XFS_BIG_BLKNOS +# if XFS_BIG_INUMS +# define XFS_BIGFS_STRING "large block/inode numbers, " +# else +# define XFS_BIGFS_STRING "large block numbers, " +# endif +#else +# define XFS_BIGFS_STRING +#endif + +#ifdef DEBUG +# define XFS_DBG_STRING "debug" +#else +# define XFS_DBG_STRING "no debug" +#endif + +#define XFS_VERSION_STRING "SGI XFS" +#define XFS_BUILD_OPTIONS XFS_ACL_STRING \ + XFS_SECURITY_STRING \ + XFS_REALTIME_STRING \ + XFS_BIGFS_STRING \ + XFS_DBG_STRING /* DBG must be last */ + +struct xfs_inode; +struct xfs_mount; +struct xfs_buftarg; +struct block_device; + +extern __uint64_t xfs_max_file_offset(unsigned int); + +extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); + +extern const struct export_operations xfs_export_operations; +extern const struct xattr_handler *xfs_xattr_handlers[]; +extern const struct quotactl_ops xfs_quotactl_operations; + +#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) + +#endif /* __XFS_SUPER_H__ */ diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c new file mode 100644 index 0000000..4604f90 --- /dev/null +++ b/fs/xfs/xfs_sync.c @@ -0,0 +1,1065 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_dinode.h" +#include "xfs_error.h" +#include "xfs_filestream.h" +#include "xfs_vnodeops.h" +#include "xfs_inode_item.h" +#include "xfs_quota.h" +#include "xfs_trace.h" +#include "xfs_fsops.h" + +#include +#include + +struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ + +/* + * The inode lookup is done in batches to keep the amount of lock traffic and + * radix tree lookups to a minimum. The batch size is a trade off between + * lookup reduction and stack usage. This is in the reclaim path, so we can't + * be too greedy. + */ +#define XFS_LOOKUP_BATCH 32 + +STATIC int +xfs_inode_ag_walk_grab( + struct xfs_inode *ip) +{ + struct inode *inode = VFS_I(ip); + + ASSERT(rcu_read_lock_held()); + + /* + * check for stale RCU freed inode + * + * If the inode has been reallocated, it doesn't matter if it's not in + * the AG we are walking - we are walking for writeback, so if it + * passes all the "valid inode" checks and is dirty, then we'll write + * it back anyway. If it has been reallocated and still being + * initialised, the XFS_INEW check below will catch it. + */ + spin_lock(&ip->i_flags_lock); + if (!ip->i_ino) + goto out_unlock_noent; + + /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ + if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + goto out_unlock_noent; + spin_unlock(&ip->i_flags_lock); + + /* nothing to sync during shutdown */ + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return EFSCORRUPTED; + + /* If we can't grab the inode, it must on it's way to reclaim. */ + if (!igrab(inode)) + return ENOENT; + + if (is_bad_inode(inode)) { + IRELE(ip); + return ENOENT; + } + + /* inode is valid */ + return 0; + +out_unlock_noent: + spin_unlock(&ip->i_flags_lock); + return ENOENT; +} + +STATIC int +xfs_inode_ag_walk( + struct xfs_mount *mp, + struct xfs_perag *pag, + int (*execute)(struct xfs_inode *ip, + struct xfs_perag *pag, int flags), + int flags) +{ + uint32_t first_index; + int last_error = 0; + int skipped; + int done; + int nr_found; + +restart: + done = 0; + skipped = 0; + first_index = 0; + nr_found = 0; + do { + struct xfs_inode *batch[XFS_LOOKUP_BATCH]; + int error = 0; + int i; + + rcu_read_lock(); + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, + (void **)batch, first_index, + XFS_LOOKUP_BATCH); + if (!nr_found) { + rcu_read_unlock(); + break; + } + + /* + * Grab the inodes before we drop the lock. if we found + * nothing, nr == 0 and the loop will be skipped. + */ + for (i = 0; i < nr_found; i++) { + struct xfs_inode *ip = batch[i]; + + if (done || xfs_inode_ag_walk_grab(ip)) + batch[i] = NULL; + + /* + * Update the index for the next lookup. Catch + * overflows into the next AG range which can occur if + * we have inodes in the last block of the AG and we + * are currently pointing to the last inode. + * + * Because we may see inodes that are from the wrong AG + * due to RCU freeing and reallocation, only update the + * index if it lies in this AG. It was a race that lead + * us to see this inode, so another lookup from the + * same index will not find it again. + */ + if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) + continue; + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) + done = 1; + } + + /* unlock now we've grabbed the inodes. */ + rcu_read_unlock(); + + for (i = 0; i < nr_found; i++) { + if (!batch[i]) + continue; + error = execute(batch[i], pag, flags); + IRELE(batch[i]); + if (error == EAGAIN) { + skipped++; + continue; + } + if (error && last_error != EFSCORRUPTED) + last_error = error; + } + + /* bail out if the filesystem is corrupted. */ + if (error == EFSCORRUPTED) + break; + + cond_resched(); + + } while (nr_found && !done); + + if (skipped) { + delay(1); + goto restart; + } + return last_error; +} + +int +xfs_inode_ag_iterator( + struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, + struct xfs_perag *pag, int flags), + int flags) +{ + struct xfs_perag *pag; + int error = 0; + int last_error = 0; + xfs_agnumber_t ag; + + ag = 0; + while ((pag = xfs_perag_get(mp, ag))) { + ag = pag->pag_agno + 1; + error = xfs_inode_ag_walk(mp, pag, execute, flags); + xfs_perag_put(pag); + if (error) { + last_error = error; + if (error == EFSCORRUPTED) + break; + } + } + return XFS_ERROR(last_error); +} + +STATIC int +xfs_sync_inode_data( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) +{ + struct inode *inode = VFS_I(ip); + struct address_space *mapping = inode->i_mapping; + int error = 0; + + if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) + goto out_wait; + + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { + if (flags & SYNC_TRYLOCK) + goto out_wait; + xfs_ilock(ip, XFS_IOLOCK_SHARED); + } + + error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? + 0 : XBF_ASYNC, FI_NONE); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + + out_wait: + if (flags & SYNC_WAIT) + xfs_ioend_wait(ip); + return error; +} + +STATIC int +xfs_sync_inode_attr( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) +{ + int error = 0; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (xfs_inode_clean(ip)) + goto out_unlock; + if (!xfs_iflock_nowait(ip)) { + if (!(flags & SYNC_WAIT)) + goto out_unlock; + xfs_iflock(ip); + } + + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + goto out_unlock; + } + + error = xfs_iflush(ip, flags); + + /* + * We don't want to try again on non-blocking flushes that can't run + * again immediately. If an inode really must be written, then that's + * what the SYNC_WAIT flag is for. + */ + if (error == EAGAIN) { + ASSERT(!(flags & SYNC_WAIT)); + error = 0; + } + + out_unlock: + xfs_iunlock(ip, XFS_ILOCK_SHARED); + return error; +} + +/* + * Write out pagecache data for the whole filesystem. + */ +STATIC int +xfs_sync_data( + struct xfs_mount *mp, + int flags) +{ + int error; + + ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); + + error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags); + if (error) + return XFS_ERROR(error); + + xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); + return 0; +} + +/* + * Write out inode metadata (attributes) for the whole filesystem. + */ +STATIC int +xfs_sync_attr( + struct xfs_mount *mp, + int flags) +{ + ASSERT((flags & ~SYNC_WAIT) == 0); + + return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); +} + +STATIC int +xfs_sync_fsdata( + struct xfs_mount *mp) +{ + struct xfs_buf *bp; + + /* + * If the buffer is pinned then push on the log so we won't get stuck + * waiting in the write for someone, maybe ourselves, to flush the log. + * + * Even though we just pushed the log above, we did not have the + * superblock buffer locked at that point so it can become pinned in + * between there and here. + */ + bp = xfs_getsb(mp, 0); + if (xfs_buf_ispinned(bp)) + xfs_log_force(mp, 0); + + return xfs_bwrite(mp, bp); +} + +/* + * When remounting a filesystem read-only or freezing the filesystem, we have + * two phases to execute. This first phase is syncing the data before we + * quiesce the filesystem, and the second is flushing all the inodes out after + * we've waited for all the transactions created by the first phase to + * complete. The second phase ensures that the inodes are written to their + * location on disk rather than just existing in transactions in the log. This + * means after a quiesce there is no log replay required to write the inodes to + * disk (this is the main difference between a sync and a quiesce). + */ +/* + * First stage of freeze - no writers will make progress now we are here, + * so we flush delwri and delalloc buffers here, then wait for all I/O to + * complete. Data is frozen at that point. Metadata is not frozen, + * transactions can still occur here so don't bother flushing the buftarg + * because it'll just get dirty again. + */ +int +xfs_quiesce_data( + struct xfs_mount *mp) +{ + int error, error2 = 0; + + xfs_qm_sync(mp, SYNC_TRYLOCK); + xfs_qm_sync(mp, SYNC_WAIT); + + /* force out the newly dirtied log buffers */ + xfs_log_force(mp, XFS_LOG_SYNC); + + /* write superblock and hoover up shutdown errors */ + error = xfs_sync_fsdata(mp); + + /* make sure all delwri buffers are written out */ + xfs_flush_buftarg(mp->m_ddev_targp, 1); + + /* mark the log as covered if needed */ + if (xfs_log_need_covered(mp)) + error2 = xfs_fs_log_dummy(mp); + + /* flush data-only devices */ + if (mp->m_rtdev_targp) + XFS_bflush(mp->m_rtdev_targp); + + return error ? error : error2; +} + +STATIC void +xfs_quiesce_fs( + struct xfs_mount *mp) +{ + int count = 0, pincount; + + xfs_reclaim_inodes(mp, 0); + xfs_flush_buftarg(mp->m_ddev_targp, 0); + + /* + * This loop must run at least twice. The first instance of the loop + * will flush most meta data but that will generate more meta data + * (typically directory updates). Which then must be flushed and + * logged before we can write the unmount record. We also so sync + * reclaim of inodes to catch any that the above delwri flush skipped. + */ + do { + xfs_reclaim_inodes(mp, SYNC_WAIT); + xfs_sync_attr(mp, SYNC_WAIT); + pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); + if (!pincount) { + delay(50); + count++; + } + } while (count < 2); +} + +/* + * Second stage of a quiesce. The data is already synced, now we have to take + * care of the metadata. New transactions are already blocked, so we need to + * wait for any remaining transactions to drain out before proceeding. + */ +void +xfs_quiesce_attr( + struct xfs_mount *mp) +{ + int error = 0; + + /* wait for all modifications to complete */ + while (atomic_read(&mp->m_active_trans) > 0) + delay(100); + + /* flush inodes and push all remaining buffers out to disk */ + xfs_quiesce_fs(mp); + + /* + * Just warn here till VFS can correctly support + * read-only remount without racing. + */ + WARN_ON(atomic_read(&mp->m_active_trans) != 0); + + /* Push the superblock and write an unmount record */ + error = xfs_log_sbcount(mp); + if (error) + xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " + "Frozen image may not be consistent."); + xfs_log_unmount_write(mp); + xfs_unmountfs_writesb(mp); +} + +static void +xfs_syncd_queue_sync( + struct xfs_mount *mp) +{ + queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, + msecs_to_jiffies(xfs_syncd_centisecs * 10)); +} + +/* + * Every sync period we need to unpin all items, reclaim inodes and sync + * disk quotas. We might need to cover the log to indicate that the + * filesystem is idle and not frozen. + */ +STATIC void +xfs_sync_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_sync_work); + int error; + + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + /* dgc: errors ignored here */ + if (mp->m_super->s_frozen == SB_UNFROZEN && + xfs_log_need_covered(mp)) + error = xfs_fs_log_dummy(mp); + else + xfs_log_force(mp, 0); + error = xfs_qm_sync(mp, SYNC_TRYLOCK); + + /* start pushing all the metadata that is currently dirty */ + xfs_ail_push_all(mp->m_ail); + } + + /* queue us up again */ + xfs_syncd_queue_sync(mp); +} + +/* + * Queue a new inode reclaim pass if there are reclaimable inodes and there + * isn't a reclaim pass already in progress. By default it runs every 5s based + * on the xfs syncd work default of 30s. Perhaps this should have it's own + * tunable, but that can be done if this method proves to be ineffective or too + * aggressive. + */ +static void +xfs_syncd_queue_reclaim( + struct xfs_mount *mp) +{ + + /* + * We can have inodes enter reclaim after we've shut down the syncd + * workqueue during unmount, so don't allow reclaim work to be queued + * during unmount. + */ + if (!(mp->m_super->s_flags & MS_ACTIVE)) + return; + + rcu_read_lock(); + if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { + queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, + msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); + } + rcu_read_unlock(); +} + +/* + * This is a fast pass over the inode cache to try to get reclaim moving on as + * many inodes as possible in a short period of time. It kicks itself every few + * seconds, as well as being kicked by the inode cache shrinker when memory + * goes low. It scans as quickly as possible avoiding locked inodes or those + * already being flushed, and once done schedules a future pass. + */ +STATIC void +xfs_reclaim_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_reclaim_work); + + xfs_reclaim_inodes(mp, SYNC_TRYLOCK); + xfs_syncd_queue_reclaim(mp); +} + +/* + * Flush delayed allocate data, attempting to free up reserved space + * from existing allocations. At this point a new allocation attempt + * has failed with ENOSPC and we are in the process of scratching our + * heads, looking about for more room. + * + * Queue a new data flush if there isn't one already in progress and + * wait for completion of the flush. This means that we only ever have one + * inode flush in progress no matter how many ENOSPC events are occurring and + * so will prevent the system from bogging down due to every concurrent + * ENOSPC event scanning all the active inodes in the system for writeback. + */ +void +xfs_flush_inodes( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + queue_work(xfs_syncd_wq, &mp->m_flush_work); + flush_work_sync(&mp->m_flush_work); +} + +STATIC void +xfs_flush_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(work, + struct xfs_mount, m_flush_work); + + xfs_sync_data(mp, SYNC_TRYLOCK); + xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); +} + +int +xfs_syncd_init( + struct xfs_mount *mp) +{ + INIT_WORK(&mp->m_flush_work, xfs_flush_worker); + INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); + INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); + + xfs_syncd_queue_sync(mp); + xfs_syncd_queue_reclaim(mp); + + return 0; +} + +void +xfs_syncd_stop( + struct xfs_mount *mp) +{ + cancel_delayed_work_sync(&mp->m_sync_work); + cancel_delayed_work_sync(&mp->m_reclaim_work); + cancel_work_sync(&mp->m_flush_work); +} + +void +__xfs_inode_set_reclaim_tag( + struct xfs_perag *pag, + struct xfs_inode *ip) +{ + radix_tree_tag_set(&pag->pag_ici_root, + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), + XFS_ICI_RECLAIM_TAG); + + if (!pag->pag_ici_reclaimable) { + /* propagate the reclaim tag up into the perag radix tree */ + spin_lock(&ip->i_mount->m_perag_lock); + radix_tree_tag_set(&ip->i_mount->m_perag_tree, + XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), + XFS_ICI_RECLAIM_TAG); + spin_unlock(&ip->i_mount->m_perag_lock); + + /* schedule periodic background inode reclaim */ + xfs_syncd_queue_reclaim(ip->i_mount); + + trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, + -1, _RET_IP_); + } + pag->pag_ici_reclaimable++; +} + +/* + * We set the inode flag atomically with the radix tree tag. + * Once we get tag lookups on the radix tree, this inode flag + * can go away. + */ +void +xfs_inode_set_reclaim_tag( + xfs_inode_t *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); + spin_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + __xfs_inode_set_reclaim_tag(pag, ip); + __xfs_iflags_set(ip, XFS_IRECLAIMABLE); + spin_unlock(&ip->i_flags_lock); + spin_unlock(&pag->pag_ici_lock); + xfs_perag_put(pag); +} + +STATIC void +__xfs_inode_clear_reclaim( + xfs_perag_t *pag, + xfs_inode_t *ip) +{ + pag->pag_ici_reclaimable--; + if (!pag->pag_ici_reclaimable) { + /* clear the reclaim tag from the perag radix tree */ + spin_lock(&ip->i_mount->m_perag_lock); + radix_tree_tag_clear(&ip->i_mount->m_perag_tree, + XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), + XFS_ICI_RECLAIM_TAG); + spin_unlock(&ip->i_mount->m_perag_lock); + trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, + -1, _RET_IP_); + } +} + +void +__xfs_inode_clear_reclaim_tag( + xfs_mount_t *mp, + xfs_perag_t *pag, + xfs_inode_t *ip) +{ + radix_tree_tag_clear(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + __xfs_inode_clear_reclaim(pag, ip); +} + +/* + * Grab the inode for reclaim exclusively. + * Return 0 if we grabbed it, non-zero otherwise. + */ +STATIC int +xfs_reclaim_inode_grab( + struct xfs_inode *ip, + int flags) +{ + ASSERT(rcu_read_lock_held()); + + /* quick check for stale RCU freed inode */ + if (!ip->i_ino) + return 1; + + /* + * do some unlocked checks first to avoid unnecessary lock traffic. + * The first is a flush lock check, the second is a already in reclaim + * check. Only do these checks if we are not going to block on locks. + */ + if ((flags & SYNC_TRYLOCK) && + (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { + return 1; + } + + /* + * The radix tree lock here protects a thread in xfs_iget from racing + * with us starting reclaim on the inode. Once we have the + * XFS_IRECLAIM flag set it will not touch us. + * + * Due to RCU lookup, we may find inodes that have been freed and only + * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that + * aren't candidates for reclaim at all, so we must check the + * XFS_IRECLAIMABLE is set first before proceeding to reclaim. + */ + spin_lock(&ip->i_flags_lock); + if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || + __xfs_iflags_test(ip, XFS_IRECLAIM)) { + /* not a reclaim candidate. */ + spin_unlock(&ip->i_flags_lock); + return 1; + } + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); + return 0; +} + +/* + * Inodes in different states need to be treated differently, and the return + * value of xfs_iflush is not sufficient to get this right. The following table + * lists the inode states and the reclaim actions necessary for non-blocking + * reclaim: + * + * + * inode state iflush ret required action + * --------------- ---------- --------------- + * bad - reclaim + * shutdown EIO unpin and reclaim + * clean, unpinned 0 reclaim + * stale, unpinned 0 reclaim + * clean, pinned(*) 0 requeue + * stale, pinned EAGAIN requeue + * dirty, delwri ok 0 requeue + * dirty, delwri blocked EAGAIN requeue + * dirty, sync flush 0 reclaim + * + * (*) dgc: I don't think the clean, pinned state is possible but it gets + * handled anyway given the order of checks implemented. + * + * As can be seen from the table, the return value of xfs_iflush() is not + * sufficient to correctly decide the reclaim action here. The checks in + * xfs_iflush() might look like duplicates, but they are not. + * + * Also, because we get the flush lock first, we know that any inode that has + * been flushed delwri has had the flush completed by the time we check that + * the inode is clean. The clean inode check needs to be done before flushing + * the inode delwri otherwise we would loop forever requeuing clean inodes as + * we cannot tell apart a successful delwri flush and a clean inode from the + * return value of xfs_iflush(). + * + * Note that because the inode is flushed delayed write by background + * writeback, the flush lock may already be held here and waiting on it can + * result in very long latencies. Hence for sync reclaims, where we wait on the + * flush lock, the caller should push out delayed write inodes first before + * trying to reclaim them to minimise the amount of time spent waiting. For + * background relaim, we just requeue the inode for the next pass. + * + * Hence the order of actions after gaining the locks should be: + * bad => reclaim + * shutdown => unpin and reclaim + * pinned, delwri => requeue + * pinned, sync => unpin + * stale => reclaim + * clean => reclaim + * dirty, delwri => flush and requeue + * dirty, sync => flush, wait and reclaim + */ +STATIC int +xfs_reclaim_inode( + struct xfs_inode *ip, + struct xfs_perag *pag, + int sync_mode) +{ + int error; + +restart: + error = 0; + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (!xfs_iflock_nowait(ip)) { + if (!(sync_mode & SYNC_WAIT)) + goto out; + xfs_iflock(ip); + } + + if (is_bad_inode(VFS_I(ip))) + goto reclaim; + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_iunpin_wait(ip); + goto reclaim; + } + if (xfs_ipincount(ip)) { + if (!(sync_mode & SYNC_WAIT)) { + xfs_ifunlock(ip); + goto out; + } + xfs_iunpin_wait(ip); + } + if (xfs_iflags_test(ip, XFS_ISTALE)) + goto reclaim; + if (xfs_inode_clean(ip)) + goto reclaim; + + /* + * Now we have an inode that needs flushing. + * + * We do a nonblocking flush here even if we are doing a SYNC_WAIT + * reclaim as we can deadlock with inode cluster removal. + * xfs_ifree_cluster() can lock the inode buffer before it locks the + * ip->i_lock, and we are doing the exact opposite here. As a result, + * doing a blocking xfs_itobp() to get the cluster buffer will result + * in an ABBA deadlock with xfs_ifree_cluster(). + * + * As xfs_ifree_cluser() must gather all inodes that are active in the + * cache to mark them stale, if we hit this case we don't actually want + * to do IO here - we want the inode marked stale so we can simply + * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, + * just unlock the inode, back off and try again. Hopefully the next + * pass through will see the stale flag set on the inode. + */ + error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); + if (sync_mode & SYNC_WAIT) { + if (error == EAGAIN) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* backoff longer than in xfs_ifree_cluster */ + delay(2); + goto restart; + } + xfs_iflock(ip); + goto reclaim; + } + + /* + * When we have to flush an inode but don't have SYNC_WAIT set, we + * flush the inode out using a delwri buffer and wait for the next + * call into reclaim to find it in a clean state instead of waiting for + * it now. We also don't return errors here - if the error is transient + * then the next reclaim pass will flush the inode, and if the error + * is permanent then the next sync reclaim will reclaim the inode and + * pass on the error. + */ + if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_warn(ip->i_mount, + "inode 0x%llx background reclaim flush failed with %d", + (long long)ip->i_ino, error); + } +out: + xfs_iflags_clear(ip, XFS_IRECLAIM); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* + * We could return EAGAIN here to make reclaim rescan the inode tree in + * a short while. However, this just burns CPU time scanning the tree + * waiting for IO to complete and xfssyncd never goes back to the idle + * state. Instead, return 0 to let the next scheduled background reclaim + * attempt to reclaim the inode again. + */ + return 0; + +reclaim: + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + XFS_STATS_INC(xs_ig_reclaims); + /* + * Remove the inode from the per-AG radix tree. + * + * Because radix_tree_delete won't complain even if the item was never + * added to the tree assert that it's been there before to catch + * problems with the inode life time early on. + */ + spin_lock(&pag->pag_ici_lock); + if (!radix_tree_delete(&pag->pag_ici_root, + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) + ASSERT(0); + __xfs_inode_clear_reclaim(pag, ip); + spin_unlock(&pag->pag_ici_lock); + + /* + * Here we do an (almost) spurious inode lock in order to coordinate + * with inode cache radix tree lookups. This is because the lookup + * can reference the inodes in the cache without taking references. + * + * We make that OK here by ensuring that we wait until the inode is + * unlocked after the lookup before we go ahead and free it. We get + * both the ilock and the iolock because the code may need to drop the + * ilock one but will still hold the iolock. + */ + xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_qm_dqdetach(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + + xfs_inode_free(ip); + return error; + +} + +/* + * Walk the AGs and reclaim the inodes in them. Even if the filesystem is + * corrupted, we still want to try to reclaim all the inodes. If we don't, + * then a shut down during filesystem unmount reclaim walk leak all the + * unreclaimed inodes. + */ +int +xfs_reclaim_inodes_ag( + struct xfs_mount *mp, + int flags, + int *nr_to_scan) +{ + struct xfs_perag *pag; + int error = 0; + int last_error = 0; + xfs_agnumber_t ag; + int trylock = flags & SYNC_TRYLOCK; + int skipped; + +restart: + ag = 0; + skipped = 0; + while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { + unsigned long first_index = 0; + int done = 0; + int nr_found = 0; + + ag = pag->pag_agno + 1; + + if (trylock) { + if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { + skipped++; + xfs_perag_put(pag); + continue; + } + first_index = pag->pag_ici_reclaim_cursor; + } else + mutex_lock(&pag->pag_ici_reclaim_lock); + + do { + struct xfs_inode *batch[XFS_LOOKUP_BATCH]; + int i; + + rcu_read_lock(); + nr_found = radix_tree_gang_lookup_tag( + &pag->pag_ici_root, + (void **)batch, first_index, + XFS_LOOKUP_BATCH, + XFS_ICI_RECLAIM_TAG); + if (!nr_found) { + done = 1; + rcu_read_unlock(); + break; + } + + /* + * Grab the inodes before we drop the lock. if we found + * nothing, nr == 0 and the loop will be skipped. + */ + for (i = 0; i < nr_found; i++) { + struct xfs_inode *ip = batch[i]; + + if (done || xfs_reclaim_inode_grab(ip, flags)) + batch[i] = NULL; + + /* + * Update the index for the next lookup. Catch + * overflows into the next AG range which can + * occur if we have inodes in the last block of + * the AG and we are currently pointing to the + * last inode. + * + * Because we may see inodes that are from the + * wrong AG due to RCU freeing and + * reallocation, only update the index if it + * lies in this AG. It was a race that lead us + * to see this inode, so another lookup from + * the same index will not find it again. + */ + if (XFS_INO_TO_AGNO(mp, ip->i_ino) != + pag->pag_agno) + continue; + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) + done = 1; + } + + /* unlock now we've grabbed the inodes. */ + rcu_read_unlock(); + + for (i = 0; i < nr_found; i++) { + if (!batch[i]) + continue; + error = xfs_reclaim_inode(batch[i], pag, flags); + if (error && last_error != EFSCORRUPTED) + last_error = error; + } + + *nr_to_scan -= XFS_LOOKUP_BATCH; + + cond_resched(); + + } while (nr_found && !done && *nr_to_scan > 0); + + if (trylock && !done) + pag->pag_ici_reclaim_cursor = first_index; + else + pag->pag_ici_reclaim_cursor = 0; + mutex_unlock(&pag->pag_ici_reclaim_lock); + xfs_perag_put(pag); + } + + /* + * if we skipped any AG, and we still have scan count remaining, do + * another pass this time using blocking reclaim semantics (i.e + * waiting on the reclaim locks and ignoring the reclaim cursors). This + * ensure that when we get more reclaimers than AGs we block rather + * than spin trying to execute reclaim. + */ + if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) { + trylock = 0; + goto restart; + } + return XFS_ERROR(last_error); +} + +int +xfs_reclaim_inodes( + xfs_mount_t *mp, + int mode) +{ + int nr_to_scan = INT_MAX; + + return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); +} + +/* + * Scan a certain number of inodes for reclaim. + * + * When called we make sure that there is a background (fast) inode reclaim in + * progress, while we will throttle the speed of reclaim via doing synchronous + * reclaim of inodes. That means if we come across dirty inodes, we wait for + * them to be cleaned, which we hope will not be very long due to the + * background walker having already kicked the IO off on those dirty inodes. + */ +void +xfs_reclaim_inodes_nr( + struct xfs_mount *mp, + int nr_to_scan) +{ + /* kick background reclaimer and push the AIL */ + xfs_syncd_queue_reclaim(mp); + xfs_ail_push_all(mp->m_ail); + + xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); +} + +/* + * Return the number of reclaimable inodes in the filesystem for + * the shrinker to determine how much to reclaim. + */ +int +xfs_reclaim_inodes_count( + struct xfs_mount *mp) +{ + struct xfs_perag *pag; + xfs_agnumber_t ag = 0; + int reclaimable = 0; + + while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { + ag = pag->pag_agno + 1; + reclaimable += pag->pag_ici_reclaimable; + xfs_perag_put(pag); + } + return reclaimable; +} + diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h new file mode 100644 index 0000000..941202e --- /dev/null +++ b/fs/xfs/xfs_sync.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef XFS_SYNC_H +#define XFS_SYNC_H 1 + +struct xfs_mount; +struct xfs_perag; + +#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ +#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ + +extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ + +int xfs_syncd_init(struct xfs_mount *mp); +void xfs_syncd_stop(struct xfs_mount *mp); + +int xfs_quiesce_data(struct xfs_mount *mp); +void xfs_quiesce_attr(struct xfs_mount *mp); + +void xfs_flush_inodes(struct xfs_inode *ip); + +int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); +int xfs_reclaim_inodes_count(struct xfs_mount *mp); +void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); + +void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); +void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); +void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, + struct xfs_inode *ip); + +int xfs_sync_inode_grab(struct xfs_inode *ip); +int xfs_inode_ag_iterator(struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), + int flags); + +#endif diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c new file mode 100644 index 0000000..ee2d2ad --- /dev/null +++ b/fs/xfs/xfs_sysctl.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2001-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include +#include +#include "xfs_error.h" + +static struct ctl_table_header *xfs_table_header; + +#ifdef CONFIG_PROC_FS +STATIC int +xfs_stats_clear_proc_handler( + ctl_table *ctl, + int write, + void __user *buffer, + size_t *lenp, + loff_t *ppos) +{ + int c, ret, *valp = ctl->data; + __uint32_t vn_active; + + ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); + + if (!ret && write && *valp) { + xfs_notice(NULL, "Clearing xfsstats"); + for_each_possible_cpu(c) { + preempt_disable(); + /* save vn_active, it's a universal truth! */ + vn_active = per_cpu(xfsstats, c).vn_active; + memset(&per_cpu(xfsstats, c), 0, + sizeof(struct xfsstats)); + per_cpu(xfsstats, c).vn_active = vn_active; + preempt_enable(); + } + xfs_stats_clear = 0; + } + + return ret; +} + +STATIC int +xfs_panic_mask_proc_handler( + ctl_table *ctl, + int write, + void __user *buffer, + size_t *lenp, + loff_t *ppos) +{ + int ret, *valp = ctl->data; + + ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); + if (!ret && write) { + xfs_panic_mask = *valp; +#ifdef DEBUG + xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); +#endif + } + return ret; +} +#endif /* CONFIG_PROC_FS */ + +static ctl_table xfs_table[] = { + { + .procname = "irix_sgid_inherit", + .data = &xfs_params.sgid_inherit.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.sgid_inherit.min, + .extra2 = &xfs_params.sgid_inherit.max + }, + { + .procname = "irix_symlink_mode", + .data = &xfs_params.symlink_mode.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.symlink_mode.min, + .extra2 = &xfs_params.symlink_mode.max + }, + { + .procname = "panic_mask", + .data = &xfs_params.panic_mask.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = xfs_panic_mask_proc_handler, + .extra1 = &xfs_params.panic_mask.min, + .extra2 = &xfs_params.panic_mask.max + }, + + { + .procname = "error_level", + .data = &xfs_params.error_level.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.error_level.min, + .extra2 = &xfs_params.error_level.max + }, + { + .procname = "xfssyncd_centisecs", + .data = &xfs_params.syncd_timer.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.syncd_timer.min, + .extra2 = &xfs_params.syncd_timer.max + }, + { + .procname = "inherit_sync", + .data = &xfs_params.inherit_sync.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.inherit_sync.min, + .extra2 = &xfs_params.inherit_sync.max + }, + { + .procname = "inherit_nodump", + .data = &xfs_params.inherit_nodump.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.inherit_nodump.min, + .extra2 = &xfs_params.inherit_nodump.max + }, + { + .procname = "inherit_noatime", + .data = &xfs_params.inherit_noatim.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.inherit_noatim.min, + .extra2 = &xfs_params.inherit_noatim.max + }, + { + .procname = "xfsbufd_centisecs", + .data = &xfs_params.xfs_buf_timer.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.xfs_buf_timer.min, + .extra2 = &xfs_params.xfs_buf_timer.max + }, + { + .procname = "age_buffer_centisecs", + .data = &xfs_params.xfs_buf_age.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.xfs_buf_age.min, + .extra2 = &xfs_params.xfs_buf_age.max + }, + { + .procname = "inherit_nosymlinks", + .data = &xfs_params.inherit_nosym.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.inherit_nosym.min, + .extra2 = &xfs_params.inherit_nosym.max + }, + { + .procname = "rotorstep", + .data = &xfs_params.rotorstep.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.rotorstep.min, + .extra2 = &xfs_params.rotorstep.max + }, + { + .procname = "inherit_nodefrag", + .data = &xfs_params.inherit_nodfrg.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.inherit_nodfrg.min, + .extra2 = &xfs_params.inherit_nodfrg.max + }, + { + .procname = "filestream_centisecs", + .data = &xfs_params.fstrm_timer.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.fstrm_timer.min, + .extra2 = &xfs_params.fstrm_timer.max, + }, + /* please keep this the last entry */ +#ifdef CONFIG_PROC_FS + { + .procname = "stats_clear", + .data = &xfs_params.stats_clear.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = xfs_stats_clear_proc_handler, + .extra1 = &xfs_params.stats_clear.min, + .extra2 = &xfs_params.stats_clear.max + }, +#endif /* CONFIG_PROC_FS */ + + {} +}; + +static ctl_table xfs_dir_table[] = { + { + .procname = "xfs", + .mode = 0555, + .child = xfs_table + }, + {} +}; + +static ctl_table xfs_root_table[] = { + { + .procname = "fs", + .mode = 0555, + .child = xfs_dir_table + }, + {} +}; + +int +xfs_sysctl_register(void) +{ + xfs_table_header = register_sysctl_table(xfs_root_table); + if (!xfs_table_header) + return -ENOMEM; + return 0; +} + +void +xfs_sysctl_unregister(void) +{ + unregister_sysctl_table(xfs_table_header); +} diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h new file mode 100644 index 0000000..b9937d4 --- /dev/null +++ b/fs/xfs/xfs_sysctl.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2001-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SYSCTL_H__ +#define __XFS_SYSCTL_H__ + +#include + +/* + * Tunable xfs parameters + */ + +typedef struct xfs_sysctl_val { + int min; + int val; + int max; +} xfs_sysctl_val_t; + +typedef struct xfs_param { + xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is + * not a member of parent dir GID. */ + xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ + xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */ + xfs_sysctl_val_t error_level; /* Degree of reporting for problems */ + xfs_sysctl_val_t syncd_timer; /* Interval between xfssyncd wakeups */ + xfs_sysctl_val_t stats_clear; /* Reset all XFS statistics to zero. */ + xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */ + xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */ + xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */ + xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */ + xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */ + xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */ + xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ + xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ + xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */ +} xfs_param_t; + +/* + * xfs_error_level: + * + * How much error reporting will be done when internal problems are + * encountered. These problems normally return an EFSCORRUPTED to their + * caller, with no other information reported. + * + * 0 No error reports + * 1 Report EFSCORRUPTED errors that will cause a filesystem shutdown + * 5 Report all EFSCORRUPTED errors (all of the above errors, plus any + * additional errors that are known to not cause shutdowns) + * + * xfs_panic_mask bit 0x8 turns the error reports into panics + */ + +enum { + /* XFS_REFCACHE_SIZE = 1 */ + /* XFS_REFCACHE_PURGE = 2 */ + /* XFS_RESTRICT_CHOWN = 3 */ + XFS_SGID_INHERIT = 4, + XFS_SYMLINK_MODE = 5, + XFS_PANIC_MASK = 6, + XFS_ERRLEVEL = 7, + XFS_SYNCD_TIMER = 8, + /* XFS_PROBE_DMAPI = 9 */ + /* XFS_PROBE_IOOPS = 10 */ + /* XFS_PROBE_QUOTA = 11 */ + XFS_STATS_CLEAR = 12, + XFS_INHERIT_SYNC = 13, + XFS_INHERIT_NODUMP = 14, + XFS_INHERIT_NOATIME = 15, + XFS_BUF_TIMER = 16, + XFS_BUF_AGE = 17, + /* XFS_IO_BYPASS = 18 */ + XFS_INHERIT_NOSYM = 19, + XFS_ROTORSTEP = 20, + XFS_INHERIT_NODFRG = 21, + XFS_FILESTREAM_TIMER = 22, +}; + +extern xfs_param_t xfs_params; + +#ifdef CONFIG_SYSCTL +extern int xfs_sysctl_register(void); +extern void xfs_sysctl_unregister(void); +#else +# define xfs_sysctl_register() (0) +# define xfs_sysctl_unregister() do { } while (0) +#endif /* CONFIG_SYSCTL */ + +#endif /* __XFS_SYSCTL_H__ */ diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c new file mode 100644 index 0000000..9010ce8 --- /dev/null +++ b/fs/xfs/xfs_trace.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2009, Christoph Hellwig + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_mount.h" +#include "xfs_ialloc.h" +#include "xfs_itable.h" +#include "xfs_alloc.h" +#include "xfs_bmap.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_log_priv.h" +#include "xfs_buf_item.h" +#include "xfs_quota.h" +#include "xfs_iomap.h" +#include "xfs_aops.h" +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" +#include "xfs_log_recover.h" +#include "xfs_inode_item.h" + +/* + * We include this last to have the helpers above available for the trace + * event implementations. + */ +#define CREATE_TRACE_POINTS +#include "xfs_trace.h" diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h new file mode 100644 index 0000000..690fc7a --- /dev/null +++ b/fs/xfs/xfs_trace.h @@ -0,0 +1,1746 @@ +/* + * Copyright (c) 2009, Christoph Hellwig + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xfs + +#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_XFS_H + +#include + +struct xfs_agf; +struct xfs_alloc_arg; +struct xfs_attr_list_context; +struct xfs_buf_log_item; +struct xfs_da_args; +struct xfs_da_node_entry; +struct xfs_dquot; +struct xlog_ticket; +struct log; +struct xlog_recover; +struct xlog_recover_item; +struct xfs_buf_log_format; +struct xfs_inode_log_format; + +DECLARE_EVENT_CLASS(xfs_attr_list_class, + TP_PROTO(struct xfs_attr_list_context *ctx), + TP_ARGS(ctx), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u32, hashval) + __field(u32, blkno) + __field(u32, offset) + __field(void *, alist) + __field(int, bufsize) + __field(int, count) + __field(int, firstu) + __field(int, dupcnt) + __field(int, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; + __entry->ino = ctx->dp->i_ino; + __entry->hashval = ctx->cursor->hashval; + __entry->blkno = ctx->cursor->blkno; + __entry->offset = ctx->cursor->offset; + __entry->alist = ctx->alist; + __entry->bufsize = ctx->bufsize; + __entry->count = ctx->count; + __entry->firstu = ctx->firstu; + __entry->flags = ctx->flags; + ), + TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " + "alist 0x%p size %u count %u firstu %u flags %d %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->hashval, + __entry->blkno, + __entry->offset, + __entry->dupcnt, + __entry->alist, + __entry->bufsize, + __entry->count, + __entry->firstu, + __entry->flags, + __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) + ) +) + +#define DEFINE_ATTR_LIST_EVENT(name) \ +DEFINE_EVENT(xfs_attr_list_class, name, \ + TP_PROTO(struct xfs_attr_list_context *ctx), \ + TP_ARGS(ctx)) +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); + +DECLARE_EVENT_CLASS(xfs_perag_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, + unsigned long caller_ip), + TP_ARGS(mp, agno, refcount, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(int, refcount) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->refcount = refcount; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno %u refcount %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->refcount, + (char *)__entry->caller_ip) +); + +#define DEFINE_PERAG_REF_EVENT(name) \ +DEFINE_EVENT(xfs_perag_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ + unsigned long caller_ip), \ + TP_ARGS(mp, agno, refcount, caller_ip)) +DEFINE_PERAG_REF_EVENT(xfs_perag_get); +DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); +DEFINE_PERAG_REF_EVENT(xfs_perag_put); +DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); +DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); + +TRACE_EVENT(xfs_attr_list_node_descend, + TP_PROTO(struct xfs_attr_list_context *ctx, + struct xfs_da_node_entry *btree), + TP_ARGS(ctx, btree), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u32, hashval) + __field(u32, blkno) + __field(u32, offset) + __field(void *, alist) + __field(int, bufsize) + __field(int, count) + __field(int, firstu) + __field(int, dupcnt) + __field(int, flags) + __field(u32, bt_hashval) + __field(u32, bt_before) + ), + TP_fast_assign( + __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; + __entry->ino = ctx->dp->i_ino; + __entry->hashval = ctx->cursor->hashval; + __entry->blkno = ctx->cursor->blkno; + __entry->offset = ctx->cursor->offset; + __entry->alist = ctx->alist; + __entry->bufsize = ctx->bufsize; + __entry->count = ctx->count; + __entry->firstu = ctx->firstu; + __entry->flags = ctx->flags; + __entry->bt_hashval = be32_to_cpu(btree->hashval); + __entry->bt_before = be32_to_cpu(btree->before); + ), + TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " + "alist 0x%p size %u count %u firstu %u flags %d %s " + "node hashval %u, node before %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->hashval, + __entry->blkno, + __entry->offset, + __entry->dupcnt, + __entry->alist, + __entry->bufsize, + __entry->count, + __entry->firstu, + __entry->flags, + __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS), + __entry->bt_hashval, + __entry->bt_before) +); + +TRACE_EVENT(xfs_iext_insert, + TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, + struct xfs_bmbt_irec *r, int state, unsigned long caller_ip), + TP_ARGS(ip, idx, r, state, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_extnum_t, idx) + __field(xfs_fileoff_t, startoff) + __field(xfs_fsblock_t, startblock) + __field(xfs_filblks_t, blockcount) + __field(xfs_exntst_t, state) + __field(int, bmap_state) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->idx = idx; + __entry->startoff = r->br_startoff; + __entry->startblock = r->br_startblock; + __entry->blockcount = r->br_blockcount; + __entry->state = r->br_state; + __entry->bmap_state = state; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " + "offset %lld block %lld count %lld flag %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), + (long)__entry->idx, + __entry->startoff, + (__int64_t)__entry->startblock, + __entry->blockcount, + __entry->state, + (char *)__entry->caller_ip) +); + +DECLARE_EVENT_CLASS(xfs_bmap_class, + TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, + unsigned long caller_ip), + TP_ARGS(ip, idx, state, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_extnum_t, idx) + __field(xfs_fileoff_t, startoff) + __field(xfs_fsblock_t, startblock) + __field(xfs_filblks_t, blockcount) + __field(xfs_exntst_t, state) + __field(int, bmap_state) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? + ip->i_afp : &ip->i_df; + struct xfs_bmbt_irec r; + + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->idx = idx; + __entry->startoff = r.br_startoff; + __entry->startblock = r.br_startblock; + __entry->blockcount = r.br_blockcount; + __entry->state = r.br_state; + __entry->bmap_state = state; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " + "offset %lld block %lld count %lld flag %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), + (long)__entry->idx, + __entry->startoff, + (__int64_t)__entry->startblock, + __entry->blockcount, + __entry->state, + (char *)__entry->caller_ip) +) + +#define DEFINE_BMAP_EVENT(name) \ +DEFINE_EVENT(xfs_bmap_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \ + unsigned long caller_ip), \ + TP_ARGS(ip, idx, state, caller_ip)) +DEFINE_BMAP_EVENT(xfs_iext_remove); +DEFINE_BMAP_EVENT(xfs_bmap_pre_update); +DEFINE_BMAP_EVENT(xfs_bmap_post_update); +DEFINE_BMAP_EVENT(xfs_extlist); + +DECLARE_EVENT_CLASS(xfs_buf_class, + TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), + TP_ARGS(bp, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, bno) + __field(size_t, buffer_length) + __field(int, hold) + __field(int, pincount) + __field(unsigned, lockval) + __field(unsigned, flags) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = bp->b_target->bt_dev; + __entry->bno = bp->b_bn; + __entry->buffer_length = bp->b_buffer_length; + __entry->hold = atomic_read(&bp->b_hold); + __entry->pincount = atomic_read(&bp->b_pin_count); + __entry->lockval = bp->b_sema.count; + __entry->flags = bp->b_flags; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->bno, + __entry->buffer_length, + __entry->hold, + __entry->pincount, + __entry->lockval, + __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), + (void *)__entry->caller_ip) +) + +#define DEFINE_BUF_EVENT(name) \ +DEFINE_EVENT(xfs_buf_class, name, \ + TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \ + TP_ARGS(bp, caller_ip)) +DEFINE_BUF_EVENT(xfs_buf_init); +DEFINE_BUF_EVENT(xfs_buf_free); +DEFINE_BUF_EVENT(xfs_buf_hold); +DEFINE_BUF_EVENT(xfs_buf_rele); +DEFINE_BUF_EVENT(xfs_buf_iodone); +DEFINE_BUF_EVENT(xfs_buf_iorequest); +DEFINE_BUF_EVENT(xfs_buf_bawrite); +DEFINE_BUF_EVENT(xfs_buf_bdwrite); +DEFINE_BUF_EVENT(xfs_buf_lock); +DEFINE_BUF_EVENT(xfs_buf_lock_done); +DEFINE_BUF_EVENT(xfs_buf_trylock); +DEFINE_BUF_EVENT(xfs_buf_unlock); +DEFINE_BUF_EVENT(xfs_buf_iowait); +DEFINE_BUF_EVENT(xfs_buf_iowait_done); +DEFINE_BUF_EVENT(xfs_buf_delwri_queue); +DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); +DEFINE_BUF_EVENT(xfs_buf_delwri_split); +DEFINE_BUF_EVENT(xfs_buf_get_uncached); +DEFINE_BUF_EVENT(xfs_bdstrat_shut); +DEFINE_BUF_EVENT(xfs_buf_item_relse); +DEFINE_BUF_EVENT(xfs_buf_item_iodone); +DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); +DEFINE_BUF_EVENT(xfs_buf_error_relse); +DEFINE_BUF_EVENT(xfs_trans_read_buf_io); +DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); + +/* not really buffer traces, but the buf provides useful information */ +DEFINE_BUF_EVENT(xfs_btree_corrupt); +DEFINE_BUF_EVENT(xfs_da_btree_corrupt); +DEFINE_BUF_EVENT(xfs_reset_dqcounts); +DEFINE_BUF_EVENT(xfs_inode_item_push); + +/* pass flags explicitly */ +DECLARE_EVENT_CLASS(xfs_buf_flags_class, + TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), + TP_ARGS(bp, flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, bno) + __field(size_t, buffer_length) + __field(int, hold) + __field(int, pincount) + __field(unsigned, lockval) + __field(unsigned, flags) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = bp->b_target->bt_dev; + __entry->bno = bp->b_bn; + __entry->buffer_length = bp->b_buffer_length; + __entry->flags = flags; + __entry->hold = atomic_read(&bp->b_hold); + __entry->pincount = atomic_read(&bp->b_pin_count); + __entry->lockval = bp->b_sema.count; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->bno, + __entry->buffer_length, + __entry->hold, + __entry->pincount, + __entry->lockval, + __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), + (void *)__entry->caller_ip) +) + +#define DEFINE_BUF_FLAGS_EVENT(name) \ +DEFINE_EVENT(xfs_buf_flags_class, name, \ + TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \ + TP_ARGS(bp, flags, caller_ip)) +DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); +DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); +DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); + +TRACE_EVENT(xfs_buf_ioerror, + TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip), + TP_ARGS(bp, error, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, bno) + __field(size_t, buffer_length) + __field(unsigned, flags) + __field(int, hold) + __field(int, pincount) + __field(unsigned, lockval) + __field(int, error) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = bp->b_target->bt_dev; + __entry->bno = bp->b_bn; + __entry->buffer_length = bp->b_buffer_length; + __entry->hold = atomic_read(&bp->b_hold); + __entry->pincount = atomic_read(&bp->b_pin_count); + __entry->lockval = bp->b_sema.count; + __entry->error = error; + __entry->flags = bp->b_flags; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d error %d flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->bno, + __entry->buffer_length, + __entry->hold, + __entry->pincount, + __entry->lockval, + __entry->error, + __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), + (void *)__entry->caller_ip) +); + +DECLARE_EVENT_CLASS(xfs_buf_item_class, + TP_PROTO(struct xfs_buf_log_item *bip), + TP_ARGS(bip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, buf_bno) + __field(size_t, buf_len) + __field(int, buf_hold) + __field(int, buf_pincount) + __field(int, buf_lockval) + __field(unsigned, buf_flags) + __field(unsigned, bli_recur) + __field(int, bli_refcount) + __field(unsigned, bli_flags) + __field(void *, li_desc) + __field(unsigned, li_flags) + ), + TP_fast_assign( + __entry->dev = bip->bli_buf->b_target->bt_dev; + __entry->bli_flags = bip->bli_flags; + __entry->bli_recur = bip->bli_recur; + __entry->bli_refcount = atomic_read(&bip->bli_refcount); + __entry->buf_bno = bip->bli_buf->b_bn; + __entry->buf_len = bip->bli_buf->b_buffer_length; + __entry->buf_flags = bip->bli_buf->b_flags; + __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); + __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); + __entry->buf_lockval = bip->bli_buf->b_sema.count; + __entry->li_desc = bip->bli_item.li_desc; + __entry->li_flags = bip->bli_item.li_flags; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d flags %s recur %d refcount %d bliflags %s " + "lidesc 0x%p liflags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->buf_bno, + __entry->buf_len, + __entry->buf_hold, + __entry->buf_pincount, + __entry->buf_lockval, + __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), + __entry->bli_recur, + __entry->bli_refcount, + __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), + __entry->li_desc, + __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) +) + +#define DEFINE_BUF_ITEM_EVENT(name) \ +DEFINE_EVENT(xfs_buf_item_class, name, \ + TP_PROTO(struct xfs_buf_log_item *bip), \ + TP_ARGS(bip)) +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf); +DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); +DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); +DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); +DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur); +DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf); +DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur); +DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf); +DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse); +DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin); +DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); +DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); +DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); + +DECLARE_EVENT_CLASS(xfs_lock_class, + TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, + unsigned long caller_ip), + TP_ARGS(ip, lock_flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, lock_flags) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->lock_flags = lock_flags; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), + (void *)__entry->caller_ip) +) + +#define DEFINE_LOCK_EVENT(name) \ +DEFINE_EVENT(xfs_lock_class, name, \ + TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \ + unsigned long caller_ip), \ + TP_ARGS(ip, lock_flags, caller_ip)) +DEFINE_LOCK_EVENT(xfs_ilock); +DEFINE_LOCK_EVENT(xfs_ilock_nowait); +DEFINE_LOCK_EVENT(xfs_ilock_demote); +DEFINE_LOCK_EVENT(xfs_iunlock); + +DECLARE_EVENT_CLASS(xfs_inode_class, + TP_PROTO(struct xfs_inode *ip), + TP_ARGS(ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + ), + TP_printk("dev %d:%d ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino) +) + +#define DEFINE_INODE_EVENT(name) \ +DEFINE_EVENT(xfs_inode_class, name, \ + TP_PROTO(struct xfs_inode *ip), \ + TP_ARGS(ip)) +DEFINE_INODE_EVENT(xfs_iget_skip); +DEFINE_INODE_EVENT(xfs_iget_reclaim); +DEFINE_INODE_EVENT(xfs_iget_reclaim_fail); +DEFINE_INODE_EVENT(xfs_iget_hit); +DEFINE_INODE_EVENT(xfs_iget_miss); + +DEFINE_INODE_EVENT(xfs_getattr); +DEFINE_INODE_EVENT(xfs_setattr); +DEFINE_INODE_EVENT(xfs_readlink); +DEFINE_INODE_EVENT(xfs_alloc_file_space); +DEFINE_INODE_EVENT(xfs_free_file_space); +DEFINE_INODE_EVENT(xfs_readdir); +#ifdef CONFIG_XFS_POSIX_ACL +DEFINE_INODE_EVENT(xfs_get_acl); +#endif +DEFINE_INODE_EVENT(xfs_vm_bmap); +DEFINE_INODE_EVENT(xfs_file_ioctl); +DEFINE_INODE_EVENT(xfs_file_compat_ioctl); +DEFINE_INODE_EVENT(xfs_ioctl_setattr); +DEFINE_INODE_EVENT(xfs_file_fsync); +DEFINE_INODE_EVENT(xfs_destroy_inode); +DEFINE_INODE_EVENT(xfs_write_inode); +DEFINE_INODE_EVENT(xfs_evict_inode); + +DEFINE_INODE_EVENT(xfs_dquot_dqalloc); +DEFINE_INODE_EVENT(xfs_dquot_dqdetach); + +DECLARE_EVENT_CLASS(xfs_iref_class, + TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), + TP_ARGS(ip, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, count) + __field(int, pincount) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->count = atomic_read(&VFS_I(ip)->i_count); + __entry->pincount = atomic_read(&ip->i_pincount); + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->count, + __entry->pincount, + (char *)__entry->caller_ip) +) + +#define DEFINE_IREF_EVENT(name) \ +DEFINE_EVENT(xfs_iref_class, name, \ + TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ + TP_ARGS(ip, caller_ip)) +DEFINE_IREF_EVENT(xfs_ihold); +DEFINE_IREF_EVENT(xfs_irele); +DEFINE_IREF_EVENT(xfs_inode_pin); +DEFINE_IREF_EVENT(xfs_inode_unpin); +DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); + +DECLARE_EVENT_CLASS(xfs_namespace_class, + TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), + TP_ARGS(dp, name), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dp_ino) + __dynamic_array(char, name, name->len) + ), + TP_fast_assign( + __entry->dev = VFS_I(dp)->i_sb->s_dev; + __entry->dp_ino = dp->i_ino; + memcpy(__get_str(name), name->name, name->len); + ), + TP_printk("dev %d:%d dp ino 0x%llx name %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dp_ino, + __get_str(name)) +) + +#define DEFINE_NAMESPACE_EVENT(name) \ +DEFINE_EVENT(xfs_namespace_class, name, \ + TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \ + TP_ARGS(dp, name)) +DEFINE_NAMESPACE_EVENT(xfs_remove); +DEFINE_NAMESPACE_EVENT(xfs_link); +DEFINE_NAMESPACE_EVENT(xfs_lookup); +DEFINE_NAMESPACE_EVENT(xfs_create); +DEFINE_NAMESPACE_EVENT(xfs_symlink); + +TRACE_EVENT(xfs_rename, + TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp, + struct xfs_name *src_name, struct xfs_name *target_name), + TP_ARGS(src_dp, target_dp, src_name, target_name), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, src_dp_ino) + __field(xfs_ino_t, target_dp_ino) + __dynamic_array(char, src_name, src_name->len) + __dynamic_array(char, target_name, target_name->len) + ), + TP_fast_assign( + __entry->dev = VFS_I(src_dp)->i_sb->s_dev; + __entry->src_dp_ino = src_dp->i_ino; + __entry->target_dp_ino = target_dp->i_ino; + memcpy(__get_str(src_name), src_name->name, src_name->len); + memcpy(__get_str(target_name), target_name->name, target_name->len); + ), + TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx" + " src name %s target name %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->src_dp_ino, + __entry->target_dp_ino, + __get_str(src_name), + __get_str(target_name)) +) + +DECLARE_EVENT_CLASS(xfs_dquot_class, + TP_PROTO(struct xfs_dquot *dqp), + TP_ARGS(dqp), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, id) + __field(unsigned, flags) + __field(unsigned, nrefs) + __field(unsigned long long, res_bcount) + __field(unsigned long long, bcount) + __field(unsigned long long, icount) + __field(unsigned long long, blk_hardlimit) + __field(unsigned long long, blk_softlimit) + __field(unsigned long long, ino_hardlimit) + __field(unsigned long long, ino_softlimit) + ), \ + TP_fast_assign( + __entry->dev = dqp->q_mount->m_super->s_dev; + __entry->id = be32_to_cpu(dqp->q_core.d_id); + __entry->flags = dqp->dq_flags; + __entry->nrefs = dqp->q_nrefs; + __entry->res_bcount = dqp->q_res_bcount; + __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount); + __entry->icount = be64_to_cpu(dqp->q_core.d_icount); + __entry->blk_hardlimit = + be64_to_cpu(dqp->q_core.d_blk_hardlimit); + __entry->blk_softlimit = + be64_to_cpu(dqp->q_core.d_blk_softlimit); + __entry->ino_hardlimit = + be64_to_cpu(dqp->q_core.d_ino_hardlimit); + __entry->ino_softlimit = + be64_to_cpu(dqp->q_core.d_ino_softlimit); + ), + TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " + "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx " + "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->id, + __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), + __entry->nrefs, + __entry->res_bcount, + __entry->bcount, + __entry->blk_hardlimit, + __entry->blk_softlimit, + __entry->icount, + __entry->ino_hardlimit, + __entry->ino_softlimit) +) + +#define DEFINE_DQUOT_EVENT(name) \ +DEFINE_EVENT(xfs_dquot_class, name, \ + TP_PROTO(struct xfs_dquot *dqp), \ + TP_ARGS(dqp)) +DEFINE_DQUOT_EVENT(xfs_dqadjust); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); +DEFINE_DQUOT_EVENT(xfs_dqattach_found); +DEFINE_DQUOT_EVENT(xfs_dqattach_get); +DEFINE_DQUOT_EVENT(xfs_dqinit); +DEFINE_DQUOT_EVENT(xfs_dqreuse); +DEFINE_DQUOT_EVENT(xfs_dqalloc); +DEFINE_DQUOT_EVENT(xfs_dqtobp_read); +DEFINE_DQUOT_EVENT(xfs_dqread); +DEFINE_DQUOT_EVENT(xfs_dqread_fail); +DEFINE_DQUOT_EVENT(xfs_dqlookup_found); +DEFINE_DQUOT_EVENT(xfs_dqlookup_want); +DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist); +DEFINE_DQUOT_EVENT(xfs_dqlookup_done); +DEFINE_DQUOT_EVENT(xfs_dqget_hit); +DEFINE_DQUOT_EVENT(xfs_dqget_miss); +DEFINE_DQUOT_EVENT(xfs_dqput); +DEFINE_DQUOT_EVENT(xfs_dqput_wait); +DEFINE_DQUOT_EVENT(xfs_dqput_free); +DEFINE_DQUOT_EVENT(xfs_dqrele); +DEFINE_DQUOT_EVENT(xfs_dqflush); +DEFINE_DQUOT_EVENT(xfs_dqflush_force); +DEFINE_DQUOT_EVENT(xfs_dqflush_done); + +DECLARE_EVENT_CLASS(xfs_loggrant_class, + TP_PROTO(struct log *log, struct xlog_ticket *tic), + TP_ARGS(log, tic), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned, trans_type) + __field(char, ocnt) + __field(char, cnt) + __field(int, curr_res) + __field(int, unit_res) + __field(unsigned int, flags) + __field(int, reserveq) + __field(int, writeq) + __field(int, grant_reserve_cycle) + __field(int, grant_reserve_bytes) + __field(int, grant_write_cycle) + __field(int, grant_write_bytes) + __field(int, curr_cycle) + __field(int, curr_block) + __field(xfs_lsn_t, tail_lsn) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->trans_type = tic->t_trans_type; + __entry->ocnt = tic->t_ocnt; + __entry->cnt = tic->t_cnt; + __entry->curr_res = tic->t_curr_res; + __entry->unit_res = tic->t_unit_res; + __entry->flags = tic->t_flags; + __entry->reserveq = list_empty(&log->l_reserveq); + __entry->writeq = list_empty(&log->l_writeq); + xlog_crack_grant_head(&log->l_grant_reserve_head, + &__entry->grant_reserve_cycle, + &__entry->grant_reserve_bytes); + xlog_crack_grant_head(&log->l_grant_write_head, + &__entry->grant_write_cycle, + &__entry->grant_write_bytes); + __entry->curr_cycle = log->l_curr_cycle; + __entry->curr_block = log->l_curr_block; + __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); + ), + TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " + "t_unit_res %u t_flags %s reserveq %s " + "writeq %s grant_reserve_cycle %d " + "grant_reserve_bytes %d grant_write_cycle %d " + "grant_write_bytes %d curr_cycle %d curr_block %d " + "tail_cycle %d tail_block %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES), + __entry->ocnt, + __entry->cnt, + __entry->curr_res, + __entry->unit_res, + __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), + __entry->reserveq ? "empty" : "active", + __entry->writeq ? "empty" : "active", + __entry->grant_reserve_cycle, + __entry->grant_reserve_bytes, + __entry->grant_write_cycle, + __entry->grant_write_bytes, + __entry->curr_cycle, + __entry->curr_block, + CYCLE_LSN(__entry->tail_lsn), + BLOCK_LSN(__entry->tail_lsn) + ) +) + +#define DEFINE_LOGGRANT_EVENT(name) \ +DEFINE_EVENT(xfs_loggrant_class, name, \ + TP_PROTO(struct log *log, struct xlog_ticket *tic), \ + TP_ARGS(log, tic)) +DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); +DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); +DEFINE_LOGGRANT_EVENT(xfs_log_reserve); +DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_error); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); +DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); + +DECLARE_EVENT_CLASS(xfs_file_class, + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), + TP_ARGS(ip, count, offset, flags), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fsize_t, new_size) + __field(loff_t, offset) + __field(size_t, count) + __field(int, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = ip->i_new_size; + __entry->offset = offset; + __entry->count = count; + __entry->flags = flags; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " + "offset 0x%llx count 0x%zx ioflags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size, + __entry->offset, + __entry->count, + __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) +) + +#define DEFINE_RW_EVENT(name) \ +DEFINE_EVENT(xfs_file_class, name, \ + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ + TP_ARGS(ip, count, offset, flags)) +DEFINE_RW_EVENT(xfs_file_read); +DEFINE_RW_EVENT(xfs_file_buffered_write); +DEFINE_RW_EVENT(xfs_file_direct_write); +DEFINE_RW_EVENT(xfs_file_splice_read); +DEFINE_RW_EVENT(xfs_file_splice_write); + +DECLARE_EVENT_CLASS(xfs_page_class, + TP_PROTO(struct inode *inode, struct page *page, unsigned long off), + TP_ARGS(inode, page, off), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(pgoff_t, pgoff) + __field(loff_t, size) + __field(unsigned long, offset) + __field(int, delalloc) + __field(int, unwritten) + ), + TP_fast_assign( + int delalloc = -1, unwritten = -1; + + if (page_has_buffers(page)) + xfs_count_page_state(page, &delalloc, &unwritten); + __entry->dev = inode->i_sb->s_dev; + __entry->ino = XFS_I(inode)->i_ino; + __entry->pgoff = page_offset(page); + __entry->size = i_size_read(inode); + __entry->offset = off; + __entry->delalloc = delalloc; + __entry->unwritten = unwritten; + ), + TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " + "delalloc %d unwritten %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->pgoff, + __entry->size, + __entry->offset, + __entry->delalloc, + __entry->unwritten) +) + +#define DEFINE_PAGE_EVENT(name) \ +DEFINE_EVENT(xfs_page_class, name, \ + TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ + TP_ARGS(inode, page, off)) +DEFINE_PAGE_EVENT(xfs_writepage); +DEFINE_PAGE_EVENT(xfs_releasepage); +DEFINE_PAGE_EVENT(xfs_invalidatepage); + +DECLARE_EVENT_CLASS(xfs_imap_class, + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, + int type, struct xfs_bmbt_irec *irec), + TP_ARGS(ip, offset, count, type, irec), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(loff_t, size) + __field(loff_t, new_size) + __field(loff_t, offset) + __field(size_t, count) + __field(int, type) + __field(xfs_fileoff_t, startoff) + __field(xfs_fsblock_t, startblock) + __field(xfs_filblks_t, blockcount) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = ip->i_new_size; + __entry->offset = offset; + __entry->count = count; + __entry->type = type; + __entry->startoff = irec ? irec->br_startoff : 0; + __entry->startblock = irec ? irec->br_startblock : 0; + __entry->blockcount = irec ? irec->br_blockcount : 0; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " + "offset 0x%llx count %zd type %s " + "startoff 0x%llx startblock %lld blockcount 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size, + __entry->offset, + __entry->count, + __print_symbolic(__entry->type, XFS_IO_TYPES), + __entry->startoff, + (__int64_t)__entry->startblock, + __entry->blockcount) +) + +#define DEFINE_IOMAP_EVENT(name) \ +DEFINE_EVENT(xfs_imap_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ + int type, struct xfs_bmbt_irec *irec), \ + TP_ARGS(ip, offset, count, type, irec)) +DEFINE_IOMAP_EVENT(xfs_map_blocks_found); +DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); +DEFINE_IOMAP_EVENT(xfs_get_blocks_found); +DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); + +DECLARE_EVENT_CLASS(xfs_simple_io_class, + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), + TP_ARGS(ip, offset, count), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(loff_t, isize) + __field(loff_t, disize) + __field(loff_t, new_size) + __field(loff_t, offset) + __field(size_t, count) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->isize = ip->i_size; + __entry->disize = ip->i_d.di_size; + __entry->new_size = ip->i_new_size; + __entry->offset = offset; + __entry->count = count; + ), + TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx " + "offset 0x%llx count %zd", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->isize, + __entry->disize, + __entry->new_size, + __entry->offset, + __entry->count) +); + +#define DEFINE_SIMPLE_IO_EVENT(name) \ +DEFINE_EVENT(xfs_simple_io_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ + TP_ARGS(ip, offset, count)) +DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); +DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); +DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); +DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); + +DECLARE_EVENT_CLASS(xfs_itrunc_class, + TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), + TP_ARGS(ip, new_size), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fsize_t, new_size) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = new_size; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size) +) + +#define DEFINE_ITRUNC_EVENT(name) \ +DEFINE_EVENT(xfs_itrunc_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ + TP_ARGS(ip, new_size)) +DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start); +DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end); + +TRACE_EVENT(xfs_pagecache_inval, + TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), + TP_ARGS(ip, start, finish), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_off_t, start) + __field(xfs_off_t, finish) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->start = start; + __entry->finish = finish; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->start, + __entry->finish) +); + +TRACE_EVENT(xfs_bunmap, + TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, + int flags, unsigned long caller_ip), + TP_ARGS(ip, bno, len, flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fileoff_t, bno) + __field(xfs_filblks_t, len) + __field(unsigned long, caller_ip) + __field(int, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->bno = bno; + __entry->len = len; + __entry->caller_ip = caller_ip; + __entry->flags = flags; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" + "flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->bno, + __entry->len, + __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS), + (void *)__entry->caller_ip) + +); + +DECLARE_EVENT_CLASS(xfs_busy_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t len), + TP_ARGS(mp, agno, agbno, len), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len) +); +#define DEFINE_BUSY_EVENT(name) \ +DEFINE_EVENT(xfs_busy_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + xfs_agblock_t agbno, xfs_extlen_t len), \ + TP_ARGS(mp, agno, agbno, len)) +DEFINE_BUSY_EVENT(xfs_alloc_busy); +DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); +DEFINE_BUSY_EVENT(xfs_alloc_busy_force); +DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); +DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); + +TRACE_EVENT(xfs_alloc_busy_trim, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t len, + xfs_agblock_t tbno, xfs_extlen_t tlen), + TP_ARGS(mp, agno, agbno, len, tbno, tlen), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(xfs_agblock_t, tbno) + __field(xfs_extlen_t, tlen) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + __entry->tbno = tbno; + __entry->tlen = tlen; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len, + __entry->tbno, + __entry->tlen) +); + +TRACE_EVENT(xfs_trans_commit_lsn, + TP_PROTO(struct xfs_trans *trans), + TP_ARGS(trans), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(struct xfs_trans *, tp) + __field(xfs_lsn_t, lsn) + ), + TP_fast_assign( + __entry->dev = trans->t_mountp->m_super->s_dev; + __entry->tp = trans; + __entry->lsn = trans->t_commit_lsn; + ), + TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->tp, + __entry->lsn) +); + +TRACE_EVENT(xfs_agf, + TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, + unsigned long caller_ip), + TP_ARGS(mp, agf, flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(int, flags) + __field(__u32, length) + __field(__u32, bno_root) + __field(__u32, cnt_root) + __field(__u32, bno_level) + __field(__u32, cnt_level) + __field(__u32, flfirst) + __field(__u32, fllast) + __field(__u32, flcount) + __field(__u32, freeblks) + __field(__u32, longest) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = be32_to_cpu(agf->agf_seqno), + __entry->flags = flags; + __entry->length = be32_to_cpu(agf->agf_length), + __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]), + __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]), + __entry->bno_level = + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]), + __entry->cnt_level = + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]), + __entry->flfirst = be32_to_cpu(agf->agf_flfirst), + __entry->fllast = be32_to_cpu(agf->agf_fllast), + __entry->flcount = be32_to_cpu(agf->agf_flcount), + __entry->freeblks = be32_to_cpu(agf->agf_freeblks), + __entry->longest = be32_to_cpu(agf->agf_longest); + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " + "levels b %u c %u flfirst %u fllast %u flcount %u " + "freeblks %u longest %u caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), + __entry->length, + __entry->bno_root, + __entry->cnt_root, + __entry->bno_level, + __entry->cnt_level, + __entry->flfirst, + __entry->fllast, + __entry->flcount, + __entry->freeblks, + __entry->longest, + (void *)__entry->caller_ip) +); + +TRACE_EVENT(xfs_free_extent, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_extlen_t len, bool isfl, int haveleft, int haveright), + TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(int, isfl) + __field(int, haveleft) + __field(int, haveright) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + __entry->isfl = isfl; + __entry->haveleft = haveleft; + __entry->haveright = haveright; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len, + __entry->isfl, + __entry->haveleft ? + (__entry->haveright ? "both" : "left") : + (__entry->haveright ? "right" : "none")) + +); + +DECLARE_EVENT_CLASS(xfs_alloc_class, + TP_PROTO(struct xfs_alloc_arg *args), + TP_ARGS(args), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, minlen) + __field(xfs_extlen_t, maxlen) + __field(xfs_extlen_t, mod) + __field(xfs_extlen_t, prod) + __field(xfs_extlen_t, minleft) + __field(xfs_extlen_t, total) + __field(xfs_extlen_t, alignment) + __field(xfs_extlen_t, minalignslop) + __field(xfs_extlen_t, len) + __field(short, type) + __field(short, otype) + __field(char, wasdel) + __field(char, wasfromfl) + __field(char, isfl) + __field(char, userdata) + __field(xfs_fsblock_t, firstblock) + ), + TP_fast_assign( + __entry->dev = args->mp->m_super->s_dev; + __entry->agno = args->agno; + __entry->agbno = args->agbno; + __entry->minlen = args->minlen; + __entry->maxlen = args->maxlen; + __entry->mod = args->mod; + __entry->prod = args->prod; + __entry->minleft = args->minleft; + __entry->total = args->total; + __entry->alignment = args->alignment; + __entry->minalignslop = args->minalignslop; + __entry->len = args->len; + __entry->type = args->type; + __entry->otype = args->otype; + __entry->wasdel = args->wasdel; + __entry->wasfromfl = args->wasfromfl; + __entry->isfl = args->isfl; + __entry->userdata = args->userdata; + __entry->firstblock = args->firstblock; + ), + TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " + "prod %u minleft %u total %u alignment %u minalignslop %u " + "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " + "userdata %d firstblock 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->minlen, + __entry->maxlen, + __entry->mod, + __entry->prod, + __entry->minleft, + __entry->total, + __entry->alignment, + __entry->minalignslop, + __entry->len, + __print_symbolic(__entry->type, XFS_ALLOC_TYPES), + __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), + __entry->wasdel, + __entry->wasfromfl, + __entry->isfl, + __entry->userdata, + (unsigned long long)__entry->firstblock) +) + +#define DEFINE_ALLOC_EVENT(name) \ +DEFINE_EVENT(xfs_alloc_class, name, \ + TP_PROTO(struct xfs_alloc_arg *args), \ + TP_ARGS(args)) +DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); +DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); +DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); +DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); +DEFINE_ALLOC_EVENT(xfs_alloc_near_first); +DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); +DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); +DEFINE_ALLOC_EVENT(xfs_alloc_near_error); +DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); +DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); +DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); +DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); +DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); +DEFINE_ALLOC_EVENT(xfs_alloc_size_done); +DEFINE_ALLOC_EVENT(xfs_alloc_size_error); +DEFINE_ALLOC_EVENT(xfs_alloc_size_busy); +DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); +DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); +DEFINE_ALLOC_EVENT(xfs_alloc_small_done); +DEFINE_ALLOC_EVENT(xfs_alloc_small_error); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); + +DECLARE_EVENT_CLASS(xfs_dir2_class, + TP_PROTO(struct xfs_da_args *args), + TP_ARGS(args), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __dynamic_array(char, name, args->namelen) + __field(int, namelen) + __field(xfs_dahash_t, hashval) + __field(xfs_ino_t, inumber) + __field(int, op_flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(args->dp)->i_sb->s_dev; + __entry->ino = args->dp->i_ino; + if (args->namelen) + memcpy(__get_str(name), args->name, args->namelen); + __entry->namelen = args->namelen; + __entry->hashval = args->hashval; + __entry->inumber = args->inumber; + __entry->op_flags = args->op_flags; + ), + TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " + "inumber 0x%llx op_flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->namelen, + __entry->namelen ? __get_str(name) : NULL, + __entry->namelen, + __entry->hashval, + __entry->inumber, + __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) +) + +#define DEFINE_DIR2_EVENT(name) \ +DEFINE_EVENT(xfs_dir2_class, name, \ + TP_PROTO(struct xfs_da_args *args), \ + TP_ARGS(args)) +DEFINE_DIR2_EVENT(xfs_dir2_sf_addname); +DEFINE_DIR2_EVENT(xfs_dir2_sf_create); +DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_sf_replace); +DEFINE_DIR2_EVENT(xfs_dir2_sf_removename); +DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4); +DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8); +DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block); +DEFINE_DIR2_EVENT(xfs_dir2_block_addname); +DEFINE_DIR2_EVENT(xfs_dir2_block_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_block_replace); +DEFINE_DIR2_EVENT(xfs_dir2_block_removename); +DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf); +DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node); +DEFINE_DIR2_EVENT(xfs_dir2_node_addname); +DEFINE_DIR2_EVENT(xfs_dir2_node_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_node_replace); +DEFINE_DIR2_EVENT(xfs_dir2_node_removename); +DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); + +DECLARE_EVENT_CLASS(xfs_dir2_space_class, + TP_PROTO(struct xfs_da_args *args, int idx), + TP_ARGS(args, idx), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, op_flags) + __field(int, idx) + ), + TP_fast_assign( + __entry->dev = VFS_I(args->dp)->i_sb->s_dev; + __entry->ino = args->dp->i_ino; + __entry->op_flags = args->op_flags; + __entry->idx = idx; + ), + TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), + __entry->idx) +) + +#define DEFINE_DIR2_SPACE_EVENT(name) \ +DEFINE_EVENT(xfs_dir2_space_class, name, \ + TP_PROTO(struct xfs_da_args *args, int idx), \ + TP_ARGS(args, idx)) +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add); +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove); +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode); +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode); + +TRACE_EVENT(xfs_dir2_leafn_moveents, + TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count), + TP_ARGS(args, src_idx, dst_idx, count), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, op_flags) + __field(int, src_idx) + __field(int, dst_idx) + __field(int, count) + ), + TP_fast_assign( + __entry->dev = VFS_I(args->dp)->i_sb->s_dev; + __entry->ino = args->dp->i_ino; + __entry->op_flags = args->op_flags; + __entry->src_idx = src_idx; + __entry->dst_idx = dst_idx; + __entry->count = count; + ), + TP_printk("dev %d:%d ino 0x%llx op_flags %s " + "src_idx %d dst_idx %d count %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), + __entry->src_idx, + __entry->dst_idx, + __entry->count) +); + +#define XFS_SWAPEXT_INODES \ + { 0, "target" }, \ + { 1, "temp" } + +#define XFS_INODE_FORMAT_STR \ + { 0, "invalid" }, \ + { 1, "local" }, \ + { 2, "extent" }, \ + { 3, "btree" } + +DECLARE_EVENT_CLASS(xfs_swap_extent_class, + TP_PROTO(struct xfs_inode *ip, int which), + TP_ARGS(ip, which), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, which) + __field(xfs_ino_t, ino) + __field(int, format) + __field(int, nex) + __field(int, max_nex) + __field(int, broot_size) + __field(int, fork_off) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->which = which; + __entry->ino = ip->i_ino; + __entry->format = ip->i_d.di_format; + __entry->nex = ip->i_d.di_nextents; + __entry->max_nex = ip->i_df.if_ext_max; + __entry->broot_size = ip->i_df.if_broot_bytes; + __entry->fork_off = XFS_IFORK_BOFF(ip); + ), + TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, " + "Max in-fork extents %d, broot size %d, fork offset %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_symbolic(__entry->which, XFS_SWAPEXT_INODES), + __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), + __entry->nex, + __entry->max_nex, + __entry->broot_size, + __entry->fork_off) +) + +#define DEFINE_SWAPEXT_EVENT(name) \ +DEFINE_EVENT(xfs_swap_extent_class, name, \ + TP_PROTO(struct xfs_inode *ip, int which), \ + TP_ARGS(ip, which)) + +DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); +DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); + +DECLARE_EVENT_CLASS(xfs_log_recover_item_class, + TP_PROTO(struct log *log, struct xlog_recover *trans, + struct xlog_recover_item *item, int pass), + TP_ARGS(log, trans, item, pass), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long, item) + __field(xlog_tid_t, tid) + __field(int, type) + __field(int, pass) + __field(int, count) + __field(int, total) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->item = (unsigned long)item; + __entry->tid = trans->r_log_tid; + __entry->type = ITEM_TYPE(item); + __entry->pass = pass; + __entry->count = item->ri_cnt; + __entry->total = item->ri_total; + ), + TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s " + "item region count/total %d/%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->tid, + __entry->pass, + (void *)__entry->item, + __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), + __entry->count, + __entry->total) +) + +#define DEFINE_LOG_RECOVER_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_item_class, name, \ + TP_PROTO(struct log *log, struct xlog_recover *trans, \ + struct xlog_recover_item *item, int pass), \ + TP_ARGS(log, trans, item, pass)) + +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover); + +DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, + TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), + TP_ARGS(log, buf_f), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(__int64_t, blkno) + __field(unsigned short, len) + __field(unsigned short, flags) + __field(unsigned short, size) + __field(unsigned int, map_size) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->blkno = buf_f->blf_blkno; + __entry->len = buf_f->blf_len; + __entry->flags = buf_f->blf_flags; + __entry->size = buf_f->blf_size; + __entry->map_size = buf_f->blf_map_size; + ), + TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, " + "map_size %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->blkno, + __entry->len, + __entry->flags, + __entry->size, + __entry->map_size) +) + +#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \ + TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \ + TP_ARGS(log, buf_f)) + +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf); + +DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, + TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), + TP_ARGS(log, in_f), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned short, size) + __field(int, fields) + __field(unsigned short, asize) + __field(unsigned short, dsize) + __field(__int64_t, blkno) + __field(int, len) + __field(int, boffset) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->ino = in_f->ilf_ino; + __entry->size = in_f->ilf_size; + __entry->fields = in_f->ilf_fields; + __entry->asize = in_f->ilf_asize; + __entry->dsize = in_f->ilf_dsize; + __entry->blkno = in_f->ilf_blkno; + __entry->len = in_f->ilf_len; + __entry->boffset = in_f->ilf_boffset; + ), + TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, " + "dsize %d, blkno 0x%llx, len %d, boffset %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->fields, + __entry->asize, + __entry->dsize, + __entry->blkno, + __entry->len, + __entry->boffset) +) +#define DEFINE_LOG_RECOVER_INO_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \ + TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \ + TP_ARGS(log, in_f)) + +DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); +DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); +DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); + +DECLARE_EVENT_CLASS(xfs_discard_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t len), + TP_ARGS(mp, agno, agbno, len), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u\n", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len) +) + +#define DEFINE_DISCARD_EVENT(name) \ +DEFINE_EVENT(xfs_discard_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + xfs_agblock_t agbno, xfs_extlen_t len), \ + TP_ARGS(mp, agno, agbno, len)) +DEFINE_DISCARD_EVENT(xfs_discard_extent); +DEFINE_DISCARD_EVENT(xfs_discard_toosmall); +DEFINE_DISCARD_EVENT(xfs_discard_exclude); +DEFINE_DISCARD_EVENT(xfs_discard_busy); + +#endif /* _TRACE_XFS_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE xfs_trace +#include diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c new file mode 100644 index 0000000..4d00ee6 --- /dev/null +++ b/fs/xfs/xfs_trans_dquot.c @@ -0,0 +1,890 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_trans_priv.h" +#include "xfs_qm.h" + +STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *); + +/* + * Add the locked dquot to the transaction. + * The dquot must be locked, and it cannot be associated with any + * transaction. + */ +void +xfs_trans_dqjoin( + xfs_trans_t *tp, + xfs_dquot_t *dqp) +{ + ASSERT(dqp->q_transp != tp); + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(dqp->q_logitem.qli_dquot == dqp); + + /* + * Get a log_item_desc to point at the new item. + */ + xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); + + /* + * Initialize d_transp so we can later determine if this dquot is + * associated with this transaction. + */ + dqp->q_transp = tp; +} + + +/* + * This is called to mark the dquot as needing + * to be logged when the transaction is committed. The dquot must + * already be associated with the given transaction. + * Note that it marks the entire transaction as dirty. In the ordinary + * case, this gets called via xfs_trans_commit, after the transaction + * is already dirty. However, there's nothing stop this from getting + * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY + * flag. + */ +void +xfs_trans_log_dquot( + xfs_trans_t *tp, + xfs_dquot_t *dqp) +{ + ASSERT(dqp->q_transp == tp); + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + + tp->t_flags |= XFS_TRANS_DIRTY; + dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY; +} + +/* + * Carry forward whatever is left of the quota blk reservation to + * the spanky new transaction + */ +void +xfs_trans_dup_dqinfo( + xfs_trans_t *otp, + xfs_trans_t *ntp) +{ + xfs_dqtrx_t *oq, *nq; + int i,j; + xfs_dqtrx_t *oqa, *nqa; + + if (!otp->t_dqinfo) + return; + + xfs_trans_alloc_dqinfo(ntp); + oqa = otp->t_dqinfo->dqa_usrdquots; + nqa = ntp->t_dqinfo->dqa_usrdquots; + + /* + * Because the quota blk reservation is carried forward, + * it is also necessary to carry forward the DQ_DIRTY flag. + */ + if(otp->t_flags & XFS_TRANS_DQ_DIRTY) + ntp->t_flags |= XFS_TRANS_DQ_DIRTY; + + for (j = 0; j < 2; j++) { + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + if (oqa[i].qt_dquot == NULL) + break; + oq = &oqa[i]; + nq = &nqa[i]; + + nq->qt_dquot = oq->qt_dquot; + nq->qt_bcount_delta = nq->qt_icount_delta = 0; + nq->qt_rtbcount_delta = 0; + + /* + * Transfer whatever is left of the reservations. + */ + nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used; + oq->qt_blk_res = oq->qt_blk_res_used; + + nq->qt_rtblk_res = oq->qt_rtblk_res - + oq->qt_rtblk_res_used; + oq->qt_rtblk_res = oq->qt_rtblk_res_used; + + nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used; + oq->qt_ino_res = oq->qt_ino_res_used; + + } + oqa = otp->t_dqinfo->dqa_grpdquots; + nqa = ntp->t_dqinfo->dqa_grpdquots; + } +} + +/* + * Wrap around mod_dquot to account for both user and group quotas. + */ +void +xfs_trans_mod_dquot_byino( + xfs_trans_t *tp, + xfs_inode_t *ip, + uint field, + long delta) +{ + xfs_mount_t *mp = tp->t_mountp; + + if (!XFS_IS_QUOTA_RUNNING(mp) || + !XFS_IS_QUOTA_ON(mp) || + ip->i_ino == mp->m_sb.sb_uquotino || + ip->i_ino == mp->m_sb.sb_gquotino) + return; + + if (tp->t_dqinfo == NULL) + xfs_trans_alloc_dqinfo(tp); + + if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) + (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); + if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot) + (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); +} + +STATIC xfs_dqtrx_t * +xfs_trans_get_dqtrx( + xfs_trans_t *tp, + xfs_dquot_t *dqp) +{ + int i; + xfs_dqtrx_t *qa; + + qa = XFS_QM_ISUDQ(dqp) ? + tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots; + + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + if (qa[i].qt_dquot == NULL || + qa[i].qt_dquot == dqp) + return &qa[i]; + } + + return NULL; +} + +/* + * Make the changes in the transaction structure. + * The moral equivalent to xfs_trans_mod_sb(). + * We don't touch any fields in the dquot, so we don't care + * if it's locked or not (most of the time it won't be). + */ +void +xfs_trans_mod_dquot( + xfs_trans_t *tp, + xfs_dquot_t *dqp, + uint field, + long delta) +{ + xfs_dqtrx_t *qtrx; + + ASSERT(tp); + ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); + qtrx = NULL; + + if (tp->t_dqinfo == NULL) + xfs_trans_alloc_dqinfo(tp); + /* + * Find either the first free slot or the slot that belongs + * to this dquot. + */ + qtrx = xfs_trans_get_dqtrx(tp, dqp); + ASSERT(qtrx); + if (qtrx->qt_dquot == NULL) + qtrx->qt_dquot = dqp; + + switch (field) { + + /* + * regular disk blk reservation + */ + case XFS_TRANS_DQ_RES_BLKS: + qtrx->qt_blk_res += (ulong)delta; + break; + + /* + * inode reservation + */ + case XFS_TRANS_DQ_RES_INOS: + qtrx->qt_ino_res += (ulong)delta; + break; + + /* + * disk blocks used. + */ + case XFS_TRANS_DQ_BCOUNT: + if (qtrx->qt_blk_res && delta > 0) { + qtrx->qt_blk_res_used += (ulong)delta; + ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used); + } + qtrx->qt_bcount_delta += delta; + break; + + case XFS_TRANS_DQ_DELBCOUNT: + qtrx->qt_delbcnt_delta += delta; + break; + + /* + * Inode Count + */ + case XFS_TRANS_DQ_ICOUNT: + if (qtrx->qt_ino_res && delta > 0) { + qtrx->qt_ino_res_used += (ulong)delta; + ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used); + } + qtrx->qt_icount_delta += delta; + break; + + /* + * rtblk reservation + */ + case XFS_TRANS_DQ_RES_RTBLKS: + qtrx->qt_rtblk_res += (ulong)delta; + break; + + /* + * rtblk count + */ + case XFS_TRANS_DQ_RTBCOUNT: + if (qtrx->qt_rtblk_res && delta > 0) { + qtrx->qt_rtblk_res_used += (ulong)delta; + ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used); + } + qtrx->qt_rtbcount_delta += delta; + break; + + case XFS_TRANS_DQ_DELRTBCOUNT: + qtrx->qt_delrtb_delta += delta; + break; + + default: + ASSERT(0); + } + tp->t_flags |= XFS_TRANS_DQ_DIRTY; +} + + +/* + * Given an array of dqtrx structures, lock all the dquots associated + * and join them to the transaction, provided they have been modified. + * We know that the highest number of dquots (of one type - usr OR grp), + * involved in a transaction is 2 and that both usr and grp combined - 3. + * So, we don't attempt to make this very generic. + */ +STATIC void +xfs_trans_dqlockedjoin( + xfs_trans_t *tp, + xfs_dqtrx_t *q) +{ + ASSERT(q[0].qt_dquot != NULL); + if (q[1].qt_dquot == NULL) { + xfs_dqlock(q[0].qt_dquot); + xfs_trans_dqjoin(tp, q[0].qt_dquot); + } else { + ASSERT(XFS_QM_TRANS_MAXDQS == 2); + xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot); + xfs_trans_dqjoin(tp, q[0].qt_dquot); + xfs_trans_dqjoin(tp, q[1].qt_dquot); + } +} + + +/* + * Called by xfs_trans_commit() and similar in spirit to + * xfs_trans_apply_sb_deltas(). + * Go thru all the dquots belonging to this transaction and modify the + * INCORE dquot to reflect the actual usages. + * Unreserve just the reservations done by this transaction. + * dquot is still left locked at exit. + */ +void +xfs_trans_apply_dquot_deltas( + xfs_trans_t *tp) +{ + int i, j; + xfs_dquot_t *dqp; + xfs_dqtrx_t *qtrx, *qa; + xfs_disk_dquot_t *d; + long totalbdelta; + long totalrtbdelta; + + if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + return; + + ASSERT(tp->t_dqinfo); + qa = tp->t_dqinfo->dqa_usrdquots; + for (j = 0; j < 2; j++) { + if (qa[0].qt_dquot == NULL) { + qa = tp->t_dqinfo->dqa_grpdquots; + continue; + } + + /* + * Lock all of the dquots and join them to the transaction. + */ + xfs_trans_dqlockedjoin(tp, qa); + + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + qtrx = &qa[i]; + /* + * The array of dquots is filled + * sequentially, not sparsely. + */ + if ((dqp = qtrx->qt_dquot) == NULL) + break; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(dqp->q_transp == tp); + + /* + * adjust the actual number of blocks used + */ + d = &dqp->q_core; + + /* + * The issue here is - sometimes we don't make a blkquota + * reservation intentionally to be fair to users + * (when the amount is small). On the other hand, + * delayed allocs do make reservations, but that's + * outside of a transaction, so we have no + * idea how much was really reserved. + * So, here we've accumulated delayed allocation blks and + * non-delay blks. The assumption is that the + * delayed ones are always reserved (outside of a + * transaction), and the others may or may not have + * quota reservations. + */ + totalbdelta = qtrx->qt_bcount_delta + + qtrx->qt_delbcnt_delta; + totalrtbdelta = qtrx->qt_rtbcount_delta + + qtrx->qt_delrtb_delta; +#ifdef DEBUG + if (totalbdelta < 0) + ASSERT(be64_to_cpu(d->d_bcount) >= + -totalbdelta); + + if (totalrtbdelta < 0) + ASSERT(be64_to_cpu(d->d_rtbcount) >= + -totalrtbdelta); + + if (qtrx->qt_icount_delta < 0) + ASSERT(be64_to_cpu(d->d_icount) >= + -qtrx->qt_icount_delta); +#endif + if (totalbdelta) + be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta); + + if (qtrx->qt_icount_delta) + be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta); + + if (totalrtbdelta) + be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta); + + /* + * Get any default limits in use. + * Start/reset the timer(s) if needed. + */ + if (d->d_id) { + xfs_qm_adjust_dqlimits(tp->t_mountp, d); + xfs_qm_adjust_dqtimers(tp->t_mountp, d); + } + + dqp->dq_flags |= XFS_DQ_DIRTY; + /* + * add this to the list of items to get logged + */ + xfs_trans_log_dquot(tp, dqp); + /* + * Take off what's left of the original reservation. + * In case of delayed allocations, there's no + * reservation that a transaction structure knows of. + */ + if (qtrx->qt_blk_res != 0) { + if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) { + if (qtrx->qt_blk_res > + qtrx->qt_blk_res_used) + dqp->q_res_bcount -= (xfs_qcnt_t) + (qtrx->qt_blk_res - + qtrx->qt_blk_res_used); + else + dqp->q_res_bcount -= (xfs_qcnt_t) + (qtrx->qt_blk_res_used - + qtrx->qt_blk_res); + } + } else { + /* + * These blks were never reserved, either inside + * a transaction or outside one (in a delayed + * allocation). Also, this isn't always a + * negative number since we sometimes + * deliberately skip quota reservations. + */ + if (qtrx->qt_bcount_delta) { + dqp->q_res_bcount += + (xfs_qcnt_t)qtrx->qt_bcount_delta; + } + } + /* + * Adjust the RT reservation. + */ + if (qtrx->qt_rtblk_res != 0) { + if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) { + if (qtrx->qt_rtblk_res > + qtrx->qt_rtblk_res_used) + dqp->q_res_rtbcount -= (xfs_qcnt_t) + (qtrx->qt_rtblk_res - + qtrx->qt_rtblk_res_used); + else + dqp->q_res_rtbcount -= (xfs_qcnt_t) + (qtrx->qt_rtblk_res_used - + qtrx->qt_rtblk_res); + } + } else { + if (qtrx->qt_rtbcount_delta) + dqp->q_res_rtbcount += + (xfs_qcnt_t)qtrx->qt_rtbcount_delta; + } + + /* + * Adjust the inode reservation. + */ + if (qtrx->qt_ino_res != 0) { + ASSERT(qtrx->qt_ino_res >= + qtrx->qt_ino_res_used); + if (qtrx->qt_ino_res > qtrx->qt_ino_res_used) + dqp->q_res_icount -= (xfs_qcnt_t) + (qtrx->qt_ino_res - + qtrx->qt_ino_res_used); + } else { + if (qtrx->qt_icount_delta) + dqp->q_res_icount += + (xfs_qcnt_t)qtrx->qt_icount_delta; + } + + ASSERT(dqp->q_res_bcount >= + be64_to_cpu(dqp->q_core.d_bcount)); + ASSERT(dqp->q_res_icount >= + be64_to_cpu(dqp->q_core.d_icount)); + ASSERT(dqp->q_res_rtbcount >= + be64_to_cpu(dqp->q_core.d_rtbcount)); + } + /* + * Do the group quotas next + */ + qa = tp->t_dqinfo->dqa_grpdquots; + } +} + +/* + * Release the reservations, and adjust the dquots accordingly. + * This is called only when the transaction is being aborted. If by + * any chance we have done dquot modifications incore (ie. deltas) already, + * we simply throw those away, since that's the expected behavior + * when a transaction is curtailed without a commit. + */ +void +xfs_trans_unreserve_and_mod_dquots( + xfs_trans_t *tp) +{ + int i, j; + xfs_dquot_t *dqp; + xfs_dqtrx_t *qtrx, *qa; + boolean_t locked; + + if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + return; + + qa = tp->t_dqinfo->dqa_usrdquots; + + for (j = 0; j < 2; j++) { + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + qtrx = &qa[i]; + /* + * We assume that the array of dquots is filled + * sequentially, not sparsely. + */ + if ((dqp = qtrx->qt_dquot) == NULL) + break; + /* + * Unreserve the original reservation. We don't care + * about the number of blocks used field, or deltas. + * Also we don't bother to zero the fields. + */ + locked = B_FALSE; + if (qtrx->qt_blk_res) { + xfs_dqlock(dqp); + locked = B_TRUE; + dqp->q_res_bcount -= + (xfs_qcnt_t)qtrx->qt_blk_res; + } + if (qtrx->qt_ino_res) { + if (!locked) { + xfs_dqlock(dqp); + locked = B_TRUE; + } + dqp->q_res_icount -= + (xfs_qcnt_t)qtrx->qt_ino_res; + } + + if (qtrx->qt_rtblk_res) { + if (!locked) { + xfs_dqlock(dqp); + locked = B_TRUE; + } + dqp->q_res_rtbcount -= + (xfs_qcnt_t)qtrx->qt_rtblk_res; + } + if (locked) + xfs_dqunlock(dqp); + + } + qa = tp->t_dqinfo->dqa_grpdquots; + } +} + +STATIC void +xfs_quota_warn( + struct xfs_mount *mp, + struct xfs_dquot *dqp, + int type) +{ + /* no warnings for project quotas - we just return ENOSPC later */ + if (dqp->dq_flags & XFS_DQ_PROJ) + return; + quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA, + be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev, + type); +} + +/* + * This reserves disk blocks and inodes against a dquot. + * Flags indicate if the dquot is to be locked here and also + * if the blk reservation is for RT or regular blocks. + * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check. + */ +STATIC int +xfs_trans_dqresv( + xfs_trans_t *tp, + xfs_mount_t *mp, + xfs_dquot_t *dqp, + long nblks, + long ninos, + uint flags) +{ + xfs_qcnt_t hardlimit; + xfs_qcnt_t softlimit; + time_t timer; + xfs_qwarncnt_t warns; + xfs_qwarncnt_t warnlimit; + xfs_qcnt_t count; + xfs_qcnt_t *resbcountp; + xfs_quotainfo_t *q = mp->m_quotainfo; + + + xfs_dqlock(dqp); + + if (flags & XFS_TRANS_DQ_RES_BLKS) { + hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit); + if (!hardlimit) + hardlimit = q->qi_bhardlimit; + softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit); + if (!softlimit) + softlimit = q->qi_bsoftlimit; + timer = be32_to_cpu(dqp->q_core.d_btimer); + warns = be16_to_cpu(dqp->q_core.d_bwarns); + warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit; + resbcountp = &dqp->q_res_bcount; + } else { + ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); + hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit); + if (!hardlimit) + hardlimit = q->qi_rtbhardlimit; + softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit); + if (!softlimit) + softlimit = q->qi_rtbsoftlimit; + timer = be32_to_cpu(dqp->q_core.d_rtbtimer); + warns = be16_to_cpu(dqp->q_core.d_rtbwarns); + warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit; + resbcountp = &dqp->q_res_rtbcount; + } + + if ((flags & XFS_QMOPT_FORCE_RES) == 0 && + dqp->q_core.d_id && + ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || + (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && + (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { + if (nblks > 0) { + /* + * dquot is locked already. See if we'd go over the + * hardlimit or exceed the timelimit if we allocate + * nblks. + */ + if (hardlimit > 0ULL && + hardlimit <= nblks + *resbcountp) { + xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); + goto error_return; + } + if (softlimit > 0ULL && + softlimit <= nblks + *resbcountp) { + if ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit)) { + xfs_quota_warn(mp, dqp, + QUOTA_NL_BSOFTLONGWARN); + goto error_return; + } + + xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN); + } + } + if (ninos > 0) { + count = be64_to_cpu(dqp->q_core.d_icount); + timer = be32_to_cpu(dqp->q_core.d_itimer); + warns = be16_to_cpu(dqp->q_core.d_iwarns); + warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; + hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); + if (!hardlimit) + hardlimit = q->qi_ihardlimit; + softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); + if (!softlimit) + softlimit = q->qi_isoftlimit; + + if (hardlimit > 0ULL && count >= hardlimit) { + xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); + goto error_return; + } + if (softlimit > 0ULL && count >= softlimit) { + if ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit)) { + xfs_quota_warn(mp, dqp, + QUOTA_NL_ISOFTLONGWARN); + goto error_return; + } + xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN); + } + } + } + + /* + * Change the reservation, but not the actual usage. + * Note that q_res_bcount = q_core.d_bcount + resv + */ + (*resbcountp) += (xfs_qcnt_t)nblks; + if (ninos != 0) + dqp->q_res_icount += (xfs_qcnt_t)ninos; + + /* + * note the reservation amt in the trans struct too, + * so that the transaction knows how much was reserved by + * it against this particular dquot. + * We don't do this when we are reserving for a delayed allocation, + * because we don't have the luxury of a transaction envelope then. + */ + if (tp) { + ASSERT(tp->t_dqinfo); + ASSERT(flags & XFS_QMOPT_RESBLK_MASK); + if (nblks != 0) + xfs_trans_mod_dquot(tp, dqp, + flags & XFS_QMOPT_RESBLK_MASK, + nblks); + if (ninos != 0) + xfs_trans_mod_dquot(tp, dqp, + XFS_TRANS_DQ_RES_INOS, + ninos); + } + ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount)); + ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount)); + ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); + + xfs_dqunlock(dqp); + return 0; + +error_return: + xfs_dqunlock(dqp); + if (flags & XFS_QMOPT_ENOSPC) + return ENOSPC; + return EDQUOT; +} + + +/* + * Given dquot(s), make disk block and/or inode reservations against them. + * The fact that this does the reservation against both the usr and + * grp/prj quotas is important, because this follows a both-or-nothing + * approach. + * + * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. + * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota. + * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks + * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks + * dquots are unlocked on return, if they were not locked by caller. + */ +int +xfs_trans_reserve_quota_bydquots( + xfs_trans_t *tp, + xfs_mount_t *mp, + xfs_dquot_t *udqp, + xfs_dquot_t *gdqp, + long nblks, + long ninos, + uint flags) +{ + int resvd = 0, error; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return 0; + + if (tp && tp->t_dqinfo == NULL) + xfs_trans_alloc_dqinfo(tp); + + ASSERT(flags & XFS_QMOPT_RESBLK_MASK); + + if (udqp) { + error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, + (flags & ~XFS_QMOPT_ENOSPC)); + if (error) + return error; + resvd = 1; + } + + if (gdqp) { + error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); + if (error) { + /* + * can't do it, so backout previous reservation + */ + if (resvd) { + flags |= XFS_QMOPT_FORCE_RES; + xfs_trans_dqresv(tp, mp, udqp, + -nblks, -ninos, flags); + } + return error; + } + } + + /* + * Didn't change anything critical, so, no need to log + */ + return 0; +} + + +/* + * Lock the dquot and change the reservation if we can. + * This doesn't change the actual usage, just the reservation. + * The inode sent in is locked. + */ +int +xfs_trans_reserve_quota_nblks( + struct xfs_trans *tp, + struct xfs_inode *ip, + long nblks, + long ninos, + uint flags) +{ + struct xfs_mount *mp = ip->i_mount; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return 0; + if (XFS_IS_PQUOTA_ON(mp)) + flags |= XFS_QMOPT_ENOSPC; + + ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); + ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == + XFS_TRANS_DQ_RES_RTBLKS || + (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == + XFS_TRANS_DQ_RES_BLKS); + + /* + * Reserve nblks against these dquots, with trans as the mediator. + */ + return xfs_trans_reserve_quota_bydquots(tp, mp, + ip->i_udquot, ip->i_gdquot, + nblks, ninos, flags); +} + +/* + * This routine is called to allocate a quotaoff log item. + */ +xfs_qoff_logitem_t * +xfs_trans_get_qoff_item( + xfs_trans_t *tp, + xfs_qoff_logitem_t *startqoff, + uint flags) +{ + xfs_qoff_logitem_t *q; + + ASSERT(tp != NULL); + + q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags); + ASSERT(q != NULL); + + /* + * Get a log_item_desc to point at the new item. + */ + xfs_trans_add_item(tp, &q->qql_item); + return q; +} + + +/* + * This is called to mark the quotaoff logitem as needing + * to be logged when the transaction is committed. The logitem must + * already be associated with the given transaction. + */ +void +xfs_trans_log_quotaoff_item( + xfs_trans_t *tp, + xfs_qoff_logitem_t *qlp) +{ + tp->t_flags |= XFS_TRANS_DIRTY; + qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY; +} + +STATIC void +xfs_trans_alloc_dqinfo( + xfs_trans_t *tp) +{ + tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); +} + +void +xfs_trans_free_dqinfo( + xfs_trans_t *tp) +{ + if (!tp->t_dqinfo) + return; + kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo); + tp->t_dqinfo = NULL; +} diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h new file mode 100644 index 0000000..7c220b4 --- /dev/null +++ b/fs/xfs/xfs_vnode.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_VNODE_H__ +#define __XFS_VNODE_H__ + +#include "xfs_fs.h" + +struct file; +struct xfs_inode; +struct xfs_iomap; +struct attrlist_cursor_kern; + +/* + * Return values for xfs_inactive. A return value of + * VN_INACTIVE_NOCACHE implies that the file system behavior + * has disassociated its state and bhv_desc_t from the vnode. + */ +#define VN_INACTIVE_CACHE 0 +#define VN_INACTIVE_NOCACHE 1 + +/* + * Flags for read/write calls - same values as IRIX + */ +#define IO_ISDIRECT 0x00004 /* bypass page cache */ +#define IO_INVIS 0x00020 /* don't update inode timestamps */ + +#define XFS_IO_FLAGS \ + { IO_ISDIRECT, "DIRECT" }, \ + { IO_INVIS, "INVIS"} + +/* + * Flush/Invalidate options for vop_toss/flush/flushinval_pages. + */ +#define FI_NONE 0 /* none */ +#define FI_REMAPF 1 /* Do a remapf prior to the operation */ +#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation. + Prevent VM access to the pages until + the operation completes. */ + +/* + * Some useful predicates. + */ +#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) +#define VN_CACHED(vp) (vp->i_mapping->nrpages) +#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \ + PAGECACHE_TAG_DIRTY) + + +#endif /* __XFS_VNODE_H__ */ diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c new file mode 100644 index 0000000..87d3e03 --- /dev/null +++ b/fs/xfs/xfs_xattr.c @@ -0,0 +1,241 @@ +/* + * Copyright (C) 2008 Christoph Hellwig. + * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xfs.h" +#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_acl.h" +#include "xfs_vnodeops.h" + +#include +#include + + +static int +xfs_xattr_get(struct dentry *dentry, const char *name, + void *value, size_t size, int xflags) +{ + struct xfs_inode *ip = XFS_I(dentry->d_inode); + int error, asize = size; + + if (strcmp(name, "") == 0) + return -EINVAL; + + /* Convert Linux syscall to XFS internal ATTR flags */ + if (!size) { + xflags |= ATTR_KERNOVAL; + value = NULL; + } + + error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); + if (error) + return error; + return asize; +} + +static int +xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags, int xflags) +{ + struct xfs_inode *ip = XFS_I(dentry->d_inode); + + if (strcmp(name, "") == 0) + return -EINVAL; + + /* Convert Linux syscall to XFS internal ATTR flags */ + if (flags & XATTR_CREATE) + xflags |= ATTR_CREATE; + if (flags & XATTR_REPLACE) + xflags |= ATTR_REPLACE; + + if (!value) + return -xfs_attr_remove(ip, (unsigned char *)name, xflags); + return -xfs_attr_set(ip, (unsigned char *)name, + (void *)value, size, xflags); +} + +static const struct xattr_handler xfs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .flags = 0, /* no flags implies user namespace */ + .get = xfs_xattr_get, + .set = xfs_xattr_set, +}; + +static const struct xattr_handler xfs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .flags = ATTR_ROOT, + .get = xfs_xattr_get, + .set = xfs_xattr_set, +}; + +static const struct xattr_handler xfs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .flags = ATTR_SECURE, + .get = xfs_xattr_get, + .set = xfs_xattr_set, +}; + +const struct xattr_handler *xfs_xattr_handlers[] = { + &xfs_xattr_user_handler, + &xfs_xattr_trusted_handler, + &xfs_xattr_security_handler, +#ifdef CONFIG_XFS_POSIX_ACL + &xfs_xattr_acl_access_handler, + &xfs_xattr_acl_default_handler, +#endif + NULL +}; + +static unsigned int xfs_xattr_prefix_len(int flags) +{ + if (flags & XFS_ATTR_SECURE) + return sizeof("security"); + else if (flags & XFS_ATTR_ROOT) + return sizeof("trusted"); + else + return sizeof("user"); +} + +static const char *xfs_xattr_prefix(int flags) +{ + if (flags & XFS_ATTR_SECURE) + return xfs_xattr_security_handler.prefix; + else if (flags & XFS_ATTR_ROOT) + return xfs_xattr_trusted_handler.prefix; + else + return xfs_xattr_user_handler.prefix; +} + +static int +xfs_xattr_put_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen, + unsigned char *value) +{ + unsigned int prefix_len = xfs_xattr_prefix_len(flags); + char *offset; + int arraytop; + + ASSERT(context->count >= 0); + + /* + * Only show root namespace entries if we are actually allowed to + * see them. + */ + if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN)) + return 0; + + arraytop = context->count + prefix_len + namelen + 1; + if (arraytop > context->firstu) { + context->count = -1; /* insufficient space */ + return 1; + } + offset = (char *)context->alist + context->count; + strncpy(offset, xfs_xattr_prefix(flags), prefix_len); + offset += prefix_len; + strncpy(offset, (char *)name, namelen); /* real name */ + offset += namelen; + *offset = '\0'; + context->count += prefix_len + namelen + 1; + return 0; +} + +static int +xfs_xattr_put_listent_sizes( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen, + unsigned char *value) +{ + context->count += xfs_xattr_prefix_len(flags) + namelen + 1; + return 0; +} + +static int +list_one_attr(const char *name, const size_t len, void *data, + size_t size, ssize_t *result) +{ + char *p = data + *result; + + *result += len; + if (!size) + return 0; + if (*result > size) + return -ERANGE; + + strcpy(p, name); + return 0; +} + +ssize_t +xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) +{ + struct xfs_attr_list_context context; + struct attrlist_cursor_kern cursor = { 0 }; + struct inode *inode = dentry->d_inode; + int error; + + /* + * First read the regular on-disk attributes. + */ + memset(&context, 0, sizeof(context)); + context.dp = XFS_I(inode); + context.cursor = &cursor; + context.resynch = 1; + context.alist = data; + context.bufsize = size; + context.firstu = context.bufsize; + + if (size) + context.put_listent = xfs_xattr_put_listent; + else + context.put_listent = xfs_xattr_put_listent_sizes; + + xfs_attr_list_int(&context); + if (context.count < 0) + return -ERANGE; + + /* + * Then add the two synthetic ACL attributes. + */ + if (posix_acl_access_exists(inode)) { + error = list_one_attr(POSIX_ACL_XATTR_ACCESS, + strlen(POSIX_ACL_XATTR_ACCESS) + 1, + data, size, &context.count); + if (error) + return error; + } + + if (posix_acl_default_exists(inode)) { + error = list_one_attr(POSIX_ACL_XATTR_DEFAULT, + strlen(POSIX_ACL_XATTR_DEFAULT) + 1, + data, size, &context.count); + if (error) + return error; + } + + return context.count; +} diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 3b8e028..e8bffbe 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1,6 +1,6 @@ #include #include -#include "../fs/xfs/linux-2.6/xfs_sysctl.h" +#include "../fs/xfs/xfs_sysctl.h" #include #include #include diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 4e4932a..362da65 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -1,6 +1,6 @@ #include #include -#include "../fs/xfs/linux-2.6/xfs_sysctl.h" +#include "../fs/xfs/xfs_sysctl.h" #include #include #include -- cgit v1.1 From 63d635b21c00069b5ade7640bcbe8ab912dc65d1 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 2 Jun 2011 04:29:23 +0000 Subject: ixgbe: Fix FCOE memory leak for DDP packets This patch is meant to fix a memory leak found via code review for FCOE. Specifically on DDP flows the SKBs were being dropped without being recycled, freed, or given to the stack. Signed-off-by: Alexander Duyck Tested-by: Ross Brattain Signed-off-by: Jeff Kirsher --- drivers/net/ixgbe/ixgbe_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index e86297b..2279039 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -1459,8 +1459,10 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (ixgbe_rx_is_fcoe(adapter, rx_desc)) { ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb, staterr); - if (!ddp_bytes) + if (!ddp_bytes) { + dev_kfree_skb_any(skb); goto next_desc; + } } #endif /* IXGBE_FCOE */ ixgbe_receive_skb(q_vector, skb, staterr, rx_ring, rx_desc); -- cgit v1.1 From 1d2101a712b3b7281a19ff6d7bfc16c2ce9d3998 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Fri, 22 Jul 2011 06:21:56 +0000 Subject: e1000e: Spurious interrupts & dropped packets with 82577/8/9 in half-duplex On 82577/8/9 in half-duplex when a received packet is passed from the PHY to the MAC, if too many preamble octects are stripped from the packet before arriving at the MAC, it can be misintrepeted as an in-band message rather than an actual frame. For example, if the frame contents resembled an interrupt request in-band message, it would trigger a false interrupt. In most cases, the packet is just dropped. By reducing the number of preamble octets stripped from the beginning of the frame when passing it from the PHY to the MAC, the MAC will interpret the frame properly. An additional uses of the magic PHY_REG(770, 16) have been updated with a define introduced with this patch. Signed-off-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher --- drivers/net/e1000e/ich8lan.c | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index 4e36978..7525e37 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -163,6 +163,11 @@ #define HV_KMRN_MODE_CTRL PHY_REG(769, 16) #define HV_KMRN_MDIO_SLOW 0x0400 +/* KMRN FIFO Control and Status */ +#define HV_KMRN_FIFO_CTRLSTA PHY_REG(770, 16) +#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK 0x7000 +#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT 12 + /* ICH GbE Flash Hardware Sequencing Flash Status Register bit breakdown */ /* Offset 04h HSFSTS */ union ich8_hws_flash_status { @@ -657,6 +662,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) struct e1000_mac_info *mac = &hw->mac; s32 ret_val; bool link; + u16 phy_reg; /* * We only want to go out to the PHY registers to see if Auto-Neg @@ -689,16 +695,35 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) mac->get_link_status = false; - if (hw->phy.type == e1000_phy_82578) { - ret_val = e1000_link_stall_workaround_hv(hw); - if (ret_val) - goto out; - } - - if (hw->mac.type == e1000_pch2lan) { + switch (hw->mac.type) { + case e1000_pch2lan: ret_val = e1000_k1_workaround_lv(hw); if (ret_val) goto out; + /* fall-thru */ + case e1000_pchlan: + if (hw->phy.type == e1000_phy_82578) { + ret_val = e1000_link_stall_workaround_hv(hw); + if (ret_val) + goto out; + } + + /* + * Workaround for PCHx parts in half-duplex: + * Set the number of preambles removed from the packet + * when it is passed from the PHY to the MAC to prevent + * the MAC from misinterpreting the packet type. + */ + e1e_rphy(hw, HV_KMRN_FIFO_CTRLSTA, &phy_reg); + phy_reg &= ~HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK; + + if ((er32(STATUS) & E1000_STATUS_FD) != E1000_STATUS_FD) + phy_reg |= (1 << HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT); + + e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, phy_reg); + break; + default: + break; } /* @@ -1355,7 +1380,7 @@ static s32 e1000_hv_phy_workarounds_ich8lan(struct e1000_hw *hw) return ret_val; /* Preamble tuning for SSC */ - ret_val = e1e_wphy(hw, PHY_REG(770, 16), 0xA204); + ret_val = e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, 0xA204); if (ret_val) return ret_val; } -- cgit v1.1 From 0ed013e28fe853244f4972cf18d8e2bd62eeb8fc Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Fri, 29 Jul 2011 05:52:56 +0000 Subject: e1000e: workaround for packet drop on 82579 at 100Mbps The MAC can drop short packets when the PHY detects noise on the line at 100Mbps due to a timing issue. Workaround the issue by increasing the PLL counter so the PHY properly recognizes the synchronization pattern from the MAC. Signed-off-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher --- drivers/net/e1000e/ich8lan.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index 7525e37..46a5277 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -137,8 +137,9 @@ #define HV_PM_CTRL PHY_REG(770, 17) /* PHY Low Power Idle Control */ -#define I82579_LPI_CTRL PHY_REG(772, 20) -#define I82579_LPI_CTRL_ENABLE_MASK 0x6000 +#define I82579_LPI_CTRL PHY_REG(772, 20) +#define I82579_LPI_CTRL_ENABLE_MASK 0x6000 +#define I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT 0x80 /* EMI Registers */ #define I82579_EMI_ADDR 0x10 @@ -1670,6 +1671,7 @@ static s32 e1000_k1_workaround_lv(struct e1000_hw *hw) s32 ret_val = 0; u16 status_reg = 0; u32 mac_reg; + u16 phy_reg; if (hw->mac.type != e1000_pch2lan) goto out; @@ -1684,12 +1686,19 @@ static s32 e1000_k1_workaround_lv(struct e1000_hw *hw) mac_reg = er32(FEXTNVM4); mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK; - if (status_reg & HV_M_STATUS_SPEED_1000) + ret_val = e1e_rphy(hw, I82579_LPI_CTRL, &phy_reg); + if (ret_val) + goto out; + + if (status_reg & HV_M_STATUS_SPEED_1000) { mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC; - else + phy_reg &= ~I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT; + } else { mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC; - + phy_reg |= I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT; + } ew32(FEXTNVM4, mac_reg); + ret_val = e1e_wphy(hw, I82579_LPI_CTRL, phy_reg); } out: -- cgit v1.1 From c6e7f51e73c1bc6044bce989ec503ef2e4758d55 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Fri, 29 Jul 2011 05:53:02 +0000 Subject: e1000e: workaround invalid Tx/Rx tail descriptor register write When the Manageability Engine (ME) is enabled on 82579, it periodically accesses some MAC CSR registers. There is an arbiter in hardware which prevents simultaneous access of these registers by the host software, i.e. the driver. There is a hardware bug in the aribter that signals a host access of the registers later than it actually happens. A write of the Transmit or Receive Descriptor Tail register could result in an incorrect value if the driver and ME perform simultaneous accesses which could result in an access to an invalid memory address. This would return an Unsupported Request which could hang the hardware. Workaround the issue by checking the FWSM register bit24 which is set by ME before it accesses the MAC CSR registers. Signed-off-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher --- drivers/net/e1000e/e1000.h | 4 +++ drivers/net/e1000e/ich8lan.c | 5 +++ drivers/net/e1000e/netdev.c | 80 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h index 35916f4..8533ad7 100644 --- a/drivers/net/e1000e/e1000.h +++ b/drivers/net/e1000e/e1000.h @@ -155,6 +155,9 @@ struct e1000_info; #define HV_M_STATUS_SPEED_1000 0x0200 #define HV_M_STATUS_LINK_UP 0x0040 +#define E1000_ICH_FWSM_PCIM2PCI 0x01000000 /* ME PCIm-to-PCI active */ +#define E1000_ICH_FWSM_PCIM2PCI_COUNT 2000 + /* Time to wait before putting the device into D3 if there's no link (in ms). */ #define LINK_TIMEOUT 100 @@ -454,6 +457,7 @@ struct e1000_info { #define FLAG2_DISABLE_AIM (1 << 8) #define FLAG2_CHECK_PHY_HANG (1 << 9) #define FLAG2_NO_DISABLE_RX (1 << 10) +#define FLAG2_PCIM2PCI_ARBITER_WA (1 << 11) #define E1000_RX_DESC_PS(R, i) \ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index 46a5277..54add27 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -814,6 +814,11 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) (adapter->hw.phy.type == e1000_phy_igp_3)) adapter->flags |= FLAG_LSC_GIG_SPEED_DROP; + /* Enable workaround for 82579 w/ ME enabled */ + if ((adapter->hw.mac.type == e1000_pch2lan) && + (er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) + adapter->flags2 |= FLAG2_PCIM2PCI_ARBITER_WA; + /* Disable EEE by default until IEEE802.3az spec is finalized */ if (adapter->flags2 & FLAG2_HAS_EEE) adapter->hw.dev_spec.ich8lan.eee_disable = true; diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 362f703..2198e61 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -519,6 +519,63 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, } /** + * e1000e_update_tail_wa - helper function for e1000e_update_[rt]dt_wa() + * @hw: pointer to the HW structure + * @tail: address of tail descriptor register + * @i: value to write to tail descriptor register + * + * When updating the tail register, the ME could be accessing Host CSR + * registers at the same time. Normally, this is handled in h/w by an + * arbiter but on some parts there is a bug that acknowledges Host accesses + * later than it should which could result in the descriptor register to + * have an incorrect value. Workaround this by checking the FWSM register + * which has bit 24 set while ME is accessing Host CSR registers, wait + * if it is set and try again a number of times. + **/ +static inline s32 e1000e_update_tail_wa(struct e1000_hw *hw, u8 __iomem * tail, + unsigned int i) +{ + unsigned int j = 0; + + while ((j++ < E1000_ICH_FWSM_PCIM2PCI_COUNT) && + (er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI)) + udelay(50); + + writel(i, tail); + + if ((j == E1000_ICH_FWSM_PCIM2PCI_COUNT) && (i != readl(tail))) + return E1000_ERR_SWFW_SYNC; + + return 0; +} + +static void e1000e_update_rdt_wa(struct e1000_adapter *adapter, unsigned int i) +{ + u8 __iomem *tail = (adapter->hw.hw_addr + adapter->rx_ring->tail); + struct e1000_hw *hw = &adapter->hw; + + if (e1000e_update_tail_wa(hw, tail, i)) { + u32 rctl = er32(RCTL); + ew32(RCTL, rctl & ~E1000_RCTL_EN); + e_err("ME firmware caused invalid RDT - resetting\n"); + schedule_work(&adapter->reset_task); + } +} + +static void e1000e_update_tdt_wa(struct e1000_adapter *adapter, unsigned int i) +{ + u8 __iomem *tail = (adapter->hw.hw_addr + adapter->tx_ring->tail); + struct e1000_hw *hw = &adapter->hw; + + if (e1000e_update_tail_wa(hw, tail, i)) { + u32 tctl = er32(TCTL); + ew32(TCTL, tctl & ~E1000_TCTL_EN); + e_err("ME firmware caused invalid TDT - resetting\n"); + schedule_work(&adapter->reset_task); + } +} + +/** * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended * @adapter: address of board private structure **/ @@ -573,7 +630,10 @@ map_skb: * such as IA-64). */ wmb(); - writel(i, adapter->hw.hw_addr + rx_ring->tail); + if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) + e1000e_update_rdt_wa(adapter, i); + else + writel(i, adapter->hw.hw_addr + rx_ring->tail); } i++; if (i == rx_ring->count) @@ -673,7 +733,11 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, * such as IA-64). */ wmb(); - writel(i << 1, adapter->hw.hw_addr + rx_ring->tail); + if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) + e1000e_update_rdt_wa(adapter, i << 1); + else + writel(i << 1, + adapter->hw.hw_addr + rx_ring->tail); } i++; @@ -756,7 +820,10 @@ check_page: * applicable for weak-ordered memory model archs, * such as IA-64). */ wmb(); - writel(i, adapter->hw.hw_addr + rx_ring->tail); + if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) + e1000e_update_rdt_wa(adapter, i); + else + writel(i, adapter->hw.hw_addr + rx_ring->tail); } } @@ -4689,7 +4756,12 @@ static void e1000_tx_queue(struct e1000_adapter *adapter, wmb(); tx_ring->next_to_use = i; - writel(i, adapter->hw.hw_addr + tx_ring->tail); + + if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) + e1000e_update_tdt_wa(adapter, i); + else + writel(i, adapter->hw.hw_addr + tx_ring->tail); + /* * we need this if more than one processor can write to our tail * at a time, it synchronizes IO on IA64/Altix systems -- cgit v1.1 From aaff12039ffd812d0c8bbff50b87b6f1f09bec3e Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 7 Aug 2011 15:20:18 +0200 Subject: firewire: core: handle ack_busy when fetching the Config ROM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some older Panasonic made camcorders (Panasonic AG-EZ30 and NV-DX110, Grundig Scenos DLC 2000) reject requests with ack_busy_X if a request is sent immediately after they sent a response to a prior transaction. This causes firewire-core to fail probing of the camcorder with "giving up on config rom for node id ...". Consequently, programs like kino or dvgrab are unaware of the presence of a camcorder. Such transaction failures happen also with the ieee1394 driver stack (of the 2.4...2.6 kernel series until 2.6.36 inclusive) but with a lower likelihood, such that kino or dvgrab are generally able to use these camcorders via the older driver stack. The cause for firewire-ohci's or firewire-core's worse behavior is not yet known. Gap count optimization in firewire-core is not the cause. Perhaps the slightly higher latency of transaction completion in the older stack plays a role. (ieee1394: AR-resp DMA context tasklet -> packet completion ktread -> user process; firewire-core: tasklet -> user process.) This change introduces retries and delays after ack_busy_X into firewire-core's Config ROM reader, such that at least firewire-core's probing and /dev/fw* creation are successful. This still leaves the problem that userland processes are facing transaction failures. gscanbus's built-in retry routines deal with them successfully, but neither kino's nor dvgrab's do ever succeed. But at least DV capture with "dvgrab -noavc -card 0" works now. Live video preview in kino works too, but not actual capture. One way to prevent Configuration ROM reading failures in application programs is to modify libraw1394 to synthesize read responses by means of firewire-core's Configuration ROM cache. This would only leave CMP and FCP transaction failures as a potential problem source for applications. Reported-and-tested-by: Thomas Seilund Reported-and-tested-by: René Fritz Signed-off-by: Stefan Richter --- drivers/firewire/core-device.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 95a4714..9f661e0 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -455,15 +455,20 @@ static struct device_attribute fw_device_attributes[] = { static int read_rom(struct fw_device *device, int generation, int index, u32 *data) { - int rcode; + u64 offset = (CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4; + int i, rcode; /* device->node_id, accessed below, must not be older than generation */ smp_rmb(); - rcode = fw_run_transaction(device->card, TCODE_READ_QUADLET_REQUEST, - device->node_id, generation, device->max_speed, - (CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4, - data, 4); + for (i = 10; i < 100; i += 10) { + rcode = fw_run_transaction(device->card, + TCODE_READ_QUADLET_REQUEST, device->node_id, + generation, device->max_speed, offset, data, 4); + if (rcode != RCODE_BUSY) + break; + msleep(i); + } be32_to_cpus(data); return rcode; -- cgit v1.1 From 441c850857148935babe000fc2ba1455fe54a6a9 Mon Sep 17 00:00:00 2001 From: Curt Wohlgemuth Date: Sat, 13 Aug 2011 11:25:18 -0400 Subject: ext4: Fix ext4_should_writeback_data() for no-journal mode ext4_should_writeback_data() had an incorrect sequence of tests to determine if it should return 0 or 1: in particular, even in no-journal mode, 0 was being returned for a non-regular-file inode. This meant that, in non-journal mode, we would use ext4_journalled_aops for directories, symlinks, and other non-regular files. However, calling journalled aop callbacks when there is no valid handle, can cause problems. This would cause a kernel crash with Jan Kara's commit 2d859db3e4 ("ext4: fix data corruption in inodes with journalled data"), because we now dereference 'handle' in ext4_journalled_write_end(). I also added BUG_ONs to check for a valid handle in the obviously journal-only aops callbacks. I tested this running xfstests with a scratch device in these modes: - no-journal - data=ordered - data=writeback - data=journal All work fine; the data=journal run has many failures and a crash in xfstests 074, but this is no different from a vanilla kernel. Signed-off-by: Curt Wohlgemuth Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/ext4_jbd2.h | 4 ++-- fs/ext4/inode.c | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index bb85757..5802fa1 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -289,10 +289,10 @@ static inline int ext4_should_order_data(struct inode *inode) static inline int ext4_should_writeback_data(struct inode *inode) { - if (!S_ISREG(inode->i_mode)) - return 0; if (EXT4_JOURNAL(inode) == NULL) return 1; + if (!S_ISREG(inode->i_mode)) + return 0; if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) return 0; if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d47264c..ad3a7ca 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -983,6 +983,8 @@ static int ext4_journalled_write_end(struct file *file, from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; + BUG_ON(!ext4_handle_valid(handle)); + if (copied < len) { if (!PageUptodate(page)) copied = 0; @@ -1699,6 +1701,8 @@ static int __ext4_journalled_writepage(struct page *page, goto out; } + BUG_ON(!ext4_handle_valid(handle)); + ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, do_journal_get_write_access); -- cgit v1.1 From 2581fdc810889fdea97689cb62481201d579c796 Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Sat, 13 Aug 2011 12:17:13 -0400 Subject: ext4: call ext4_ioend_wait and ext4_flush_completed_IO in ext4_evict_inode Flush inode's i_completed_io_list before calling ext4_io_wait to prevent the following deadlock scenario: A page fault happens while some process is writing inode A. During page fault, shrink_icache_memory is called that in turn evicts another inode B. Inode B has some pending io_end work so it calls ext4_ioend_wait() that waits for inode B's i_ioend_count to become zero. However, inode B's ioend work was queued behind some of inode A's ioend work on the same cpu's ext4-dio-unwritten workqueue. As the ext4-dio-unwritten thread on that cpu is processing inode A's ioend work, it tries to grab inode A's i_mutex lock. Since the i_mutex lock of inode A is still hold before the page fault happened, we enter a deadlock. Also moves ext4_flush_completed_IO and ext4_ioend_wait from ext4_destroy_inode() to ext4_evict_inode(). During inode deleteion, ext4_evict_inode() is called before ext4_destroy_inode() and in ext4_evict_inode(), we may call ext4_truncate() without holding i_mutex lock. As a result, there is a race between flush_completed_IO that is called from ext4_ext_truncate() and ext4_end_io_work, which may cause corruption on an io_end structure. This change moves ext4_flush_completed_IO and ext4_ioend_wait from ext4_destroy_inode() to ext4_evict_inode() to resolve the race between ext4_truncate() and ext4_end_io_work during inode deletion. Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 6 ++++++ fs/ext4/super.c | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ad3a7ca..7dd6981 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -120,6 +120,12 @@ void ext4_evict_inode(struct inode *inode) int err; trace_ext4_evict_inode(inode); + + mutex_lock(&inode->i_mutex); + ext4_flush_completed_IO(inode); + mutex_unlock(&inode->i_mutex); + ext4_ioend_wait(inode); + if (inode->i_nlink) { /* * When journalling data dirty buffers are tracked only in the diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4687fea..44d0c8d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -919,7 +919,6 @@ static void ext4_i_callback(struct rcu_head *head) static void ext4_destroy_inode(struct inode *inode) { - ext4_ioend_wait(inode); if (!list_empty(&(EXT4_I(inode)->i_orphan))) { ext4_msg(inode->i_sb, KERN_ERR, "Inode %lu (%p): orphan list check failed!", -- cgit v1.1 From 32c80b32c053dc52712dedac5e4d0aa7c93fc353 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sat, 13 Aug 2011 12:30:59 -0400 Subject: ext4: Resolve the hang of direct i/o read in handling EXT4_IO_END_UNWRITTEN. EXT4_IO_END_UNWRITTEN flag set and the increase of i_aiodio_unwritten should be done simultaneously since ext4_end_io_nolock always clear the flag and decrease the counter in the same time. We don't increase i_aiodio_unwritten when setting EXT4_IO_END_UNWRITTEN so it will go nagative and causes some process to wait forever. Part of the patch came from Eric in his e-mail, but it doesn't fix the problem met by Michael actually. http://marc.info/?l=linux-ext4&m=131316851417460&w=2 Reported-and-Tested-by: Michael Tokarev Signed-off-by: Eric Sandeen Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 9 ++++++++- fs/ext4/page-io.c | 6 ++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7dd6981..762e803 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2678,8 +2678,15 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) goto out; } - io_end->flag = EXT4_IO_END_UNWRITTEN; + /* + * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now, + * but being more careful is always safe for the future change. + */ inode = io_end->inode; + if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + io_end->flag |= EXT4_IO_END_UNWRITTEN; + atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + } /* Add the io_end to per-inode completed io list*/ spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 430c401..78839af 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -334,8 +334,10 @@ submit_and_retry: if ((io_end->num_io_pages >= MAX_IO_PAGES) && (io_end->pages[io_end->num_io_pages-1] != io_page)) goto submit_and_retry; - if (buffer_uninit(bh)) - io->io_end->flag |= EXT4_IO_END_UNWRITTEN; + if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + io_end->flag |= EXT4_IO_END_UNWRITTEN; + atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + } io->io_end->size += bh->b_size; io->io_next_block++; ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); -- cgit v1.1 From 9dd75f1f1a02d656a11a7b9b9e6c2759b9c1e946 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 13 Aug 2011 12:58:21 -0400 Subject: ext4: fix nomblk_io_submit option so it correctly converts uninit blocks Bug discovered by Jan Kara: Finally, commit 1449032be17abb69116dbc393f67ceb8bd034f92 returned back the old IO submission code but apparently it forgot to return the old handling of uninitialized buffers so we unconditionnaly call block_write_full_page() without specifying end_io function. So AFAICS we never convert unwritten extents to written in some cases. For example when I mount the fs as: mount -t ext4 -o nomblk_io_submit,dioread_nolock /dev/ubdb /mnt and do int fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0600); char buf[1024]; memset(buf, 'a', sizeof(buf)); fallocate(fd, 0, 0, 16384); write(fd, buf, sizeof(buf)); I get a file full of zeros (after remounting the filesystem so that pagecache is dropped) instead of seeing the first KB contain 'a's. Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 762e803..c4da98a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1291,7 +1291,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) err = ext4_bio_write_page(&io_submit, page, len, mpd->wbc); - else + else if (buffer_uninit(page_bufs)) { + ext4_set_bh_endio(page_bufs, inode); + err = block_write_full_page_endio(page, + noalloc_get_block_write, + mpd->wbc, ext4_end_io_buffer_write); + } else err = block_write_full_page(page, noalloc_get_block_write, mpd->wbc); -- cgit v1.1 From 4eb60d869fdad7acd098b53bfd1863c311d8933d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sat, 13 Aug 2011 09:02:43 -0700 Subject: Revert "iwlagn: sysfs couldn't find the priv pointer" This reverts commit cc1a93e68f6c0d736b771f0746e8e4186f483fdc. This fix introduced a bug: bad pointer in unload. Signed-off-by: Emmanuel Grumbach Signed-off-by: Wey-Yi Guy Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-pci.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-pci.c b/drivers/net/wireless/iwlwifi/iwl-pci.c index 69d4ec4..fb7e436 100644 --- a/drivers/net/wireless/iwlwifi/iwl-pci.c +++ b/drivers/net/wireless/iwlwifi/iwl-pci.c @@ -134,7 +134,6 @@ static void iwl_pci_apm_config(struct iwl_bus *bus) static void iwl_pci_set_drv_data(struct iwl_bus *bus, void *drv_data) { bus->drv_data = drv_data; - pci_set_drvdata(IWL_BUS_GET_PCI_DEV(bus), drv_data); } static void iwl_pci_get_hw_id(struct iwl_bus *bus, char buf[], @@ -455,6 +454,8 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_write_config_word(pdev, PCI_COMMAND, pci_cmd); } + pci_set_drvdata(pdev, bus); + bus->dev = &pdev->dev; bus->irq = pdev->irq; bus->ops = &pci_ops; @@ -493,12 +494,11 @@ static void iwl_pci_down(struct iwl_bus *bus) static void __devexit iwl_pci_remove(struct pci_dev *pdev) { - struct iwl_priv *priv = pci_get_drvdata(pdev); - void *bus_specific = priv->bus->bus_specific; + struct iwl_bus *bus = pci_get_drvdata(pdev); - iwl_remove(priv); + iwl_remove(bus->drv_data); - iwl_pci_down(bus_specific); + iwl_pci_down(bus); } #ifdef CONFIG_PM @@ -506,20 +506,20 @@ static void __devexit iwl_pci_remove(struct pci_dev *pdev) static int iwl_pci_suspend(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); - struct iwl_priv *priv = pci_get_drvdata(pdev); + struct iwl_bus *bus = pci_get_drvdata(pdev); /* Before you put code here, think about WoWLAN. You cannot check here * whether WoWLAN is enabled or not, and your code will run even if * WoWLAN is enabled - don't kill the NIC, someone may need it in Sx. */ - return iwl_suspend(priv); + return iwl_suspend(bus->drv_data); } static int iwl_pci_resume(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); - struct iwl_priv *priv = pci_get_drvdata(pdev); + struct iwl_bus *bus = pci_get_drvdata(pdev); /* Before you put code here, think about WoWLAN. You cannot check here * whether WoWLAN is enabled or not, and your code will run even if @@ -532,7 +532,7 @@ static int iwl_pci_resume(struct device *device) */ pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00); - return iwl_resume(priv); + return iwl_resume(bus->drv_data); } static SIMPLE_DEV_PM_OPS(iwl_dev_pm_ops, iwl_pci_suspend, iwl_pci_resume); -- cgit v1.1 From 16a9d06c753abc44f66f88e03bbecb3f1e45d71b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sat, 13 Aug 2011 09:02:44 -0700 Subject: iwlagn: sysfs couldn't find the priv pointer This bug has been introduced by: d593411084a56124aa9d80aafa15db8463b2d8f7 Author: Emmanuel Grumbach Date: Mon Jul 11 10:48:51 2011 +0300 iwlagn: simplify the bus architecture Revert part of the buggy patch: dev_get_drvdata will now return iwl_priv as it did before the patch. Signed-off-by: Emmanuel Grumbach Signed-off-by: Wey-Yi Guy Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-pci.c | 39 +++++++++++++++------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-pci.c b/drivers/net/wireless/iwlwifi/iwl-pci.c index fb7e436..2fdbffa 100644 --- a/drivers/net/wireless/iwlwifi/iwl-pci.c +++ b/drivers/net/wireless/iwlwifi/iwl-pci.c @@ -134,6 +134,7 @@ static void iwl_pci_apm_config(struct iwl_bus *bus) static void iwl_pci_set_drv_data(struct iwl_bus *bus, void *drv_data) { bus->drv_data = drv_data; + pci_set_drvdata(IWL_BUS_GET_PCI_DEV(bus), drv_data); } static void iwl_pci_get_hw_id(struct iwl_bus *bus, char buf[], @@ -454,8 +455,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_write_config_word(pdev, PCI_COMMAND, pci_cmd); } - pci_set_drvdata(pdev, bus); - bus->dev = &pdev->dev; bus->irq = pdev->irq; bus->ops = &pci_ops; @@ -479,26 +478,22 @@ out_no_pci: return err; } -static void iwl_pci_down(struct iwl_bus *bus) -{ - struct iwl_pci_bus *pci_bus = (struct iwl_pci_bus *) bus->bus_specific; - - pci_disable_msi(pci_bus->pci_dev); - pci_iounmap(pci_bus->pci_dev, pci_bus->hw_base); - pci_release_regions(pci_bus->pci_dev); - pci_disable_device(pci_bus->pci_dev); - pci_set_drvdata(pci_bus->pci_dev, NULL); - - kfree(bus); -} - static void __devexit iwl_pci_remove(struct pci_dev *pdev) { - struct iwl_bus *bus = pci_get_drvdata(pdev); + struct iwl_priv *priv = pci_get_drvdata(pdev); + struct iwl_bus *bus = priv->bus; + struct iwl_pci_bus *pci_bus = IWL_BUS_GET_PCI_BUS(bus); + struct pci_dev *pci_dev = IWL_BUS_GET_PCI_DEV(bus); - iwl_remove(bus->drv_data); + iwl_remove(priv); - iwl_pci_down(bus); + pci_disable_msi(pci_dev); + pci_iounmap(pci_dev, pci_bus->hw_base); + pci_release_regions(pci_dev); + pci_disable_device(pci_dev); + pci_set_drvdata(pci_dev, NULL); + + kfree(bus); } #ifdef CONFIG_PM @@ -506,20 +501,20 @@ static void __devexit iwl_pci_remove(struct pci_dev *pdev) static int iwl_pci_suspend(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); - struct iwl_bus *bus = pci_get_drvdata(pdev); + struct iwl_priv *priv = pci_get_drvdata(pdev); /* Before you put code here, think about WoWLAN. You cannot check here * whether WoWLAN is enabled or not, and your code will run even if * WoWLAN is enabled - don't kill the NIC, someone may need it in Sx. */ - return iwl_suspend(bus->drv_data); + return iwl_suspend(priv); } static int iwl_pci_resume(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); - struct iwl_bus *bus = pci_get_drvdata(pdev); + struct iwl_priv *priv = pci_get_drvdata(pdev); /* Before you put code here, think about WoWLAN. You cannot check here * whether WoWLAN is enabled or not, and your code will run even if @@ -532,7 +527,7 @@ static int iwl_pci_resume(struct device *device) */ pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00); - return iwl_resume(bus->drv_data); + return iwl_resume(priv); } static SIMPLE_DEV_PM_OPS(iwl_dev_pm_ops, iwl_pci_suspend, iwl_pci_resume); -- cgit v1.1 From f6957f88e59df5008f7b2169400be657f81cdb80 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 7 Aug 2011 23:15:47 +0000 Subject: vmxnet3: Don't enable vlan filters in promiscuous mode. The vmxnet3 driver enables vlan filters if filtering is enabled for any vlan. In promiscuous mode the filter table is cleared to in order to disable filtering. However, if a vlan device is subsequently created that vlan will be added to the filter, re-engaging it. As a result, not only do we not see all the vlans in promiscuous mode, we don't even see vlans for which a filter was previously created. CC: Scott J. Goldman CC: Shreyas Bhatewara CC: VMware PV-Drivers Signed-off-by: Jesse Gross Signed-off-by: Shreyas N Bhatewara Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 1cbacb3..0959583 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1929,14 +1929,17 @@ static void vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); - u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable; - unsigned long flags; - VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid); - spin_lock_irqsave(&adapter->cmd_lock, flags); - VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, - VMXNET3_CMD_UPDATE_VLAN_FILTERS); - spin_unlock_irqrestore(&adapter->cmd_lock, flags); + if (!(netdev->flags & IFF_PROMISC)) { + u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable; + unsigned long flags; + + VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_VLAN_FILTERS); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } set_bit(vid, adapter->active_vlans); } @@ -1946,14 +1949,17 @@ static void vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); - u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable; - unsigned long flags; - VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid); - spin_lock_irqsave(&adapter->cmd_lock, flags); - VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, - VMXNET3_CMD_UPDATE_VLAN_FILTERS); - spin_unlock_irqrestore(&adapter->cmd_lock, flags); + if (!(netdev->flags & IFF_PROMISC)) { + u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable; + unsigned long flags; + + VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_VLAN_FILTERS); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } clear_bit(vid, adapter->active_vlans); } -- cgit v1.1 From 75bc8ef528f7c4ea7e80384c5593487b6b3b535e Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Mon, 8 Aug 2011 02:34:07 +0000 Subject: usbnet/cdc_ncm: Don't use stack variables for DMA The cdc_ncm driver still has a few places where stack variables are passed to the cdc_ncm_do_request function. This triggers a stack trace in lib/dma-debug.c if the CONFIG_DEBUG_DMA_API option is set. Adjust these calls to pass parameters that have been allocated with kzalloc. Signed-off-by: Josh Boyer Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 47 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index a03336e..f06fb78 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -228,23 +228,40 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) { if (flags & USB_CDC_NCM_NCAP_NTB_INPUT_SIZE) { - struct usb_cdc_ncm_ndp_input_size ndp_in_sz; + struct usb_cdc_ncm_ndp_input_size *ndp_in_sz; + + ndp_in_sz = kzalloc(sizeof(*ndp_in_sz), GFP_KERNEL); + if (!ndp_in_sz) { + err = -ENOMEM; + goto size_err; + } + err = usb_control_msg(ctx->udev, usb_sndctrlpipe(ctx->udev, 0), USB_CDC_SET_NTB_INPUT_SIZE, USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, - 0, iface_no, &ndp_in_sz, 8, 1000); + 0, iface_no, ndp_in_sz, 8, 1000); + kfree(ndp_in_sz); } else { - __le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max); + __le32 *dwNtbInMaxSize; + dwNtbInMaxSize = kzalloc(sizeof(*dwNtbInMaxSize), + GFP_KERNEL); + if (!dwNtbInMaxSize) { + err = -ENOMEM; + goto size_err; + } + *dwNtbInMaxSize = cpu_to_le32(ctx->rx_max); + err = usb_control_msg(ctx->udev, usb_sndctrlpipe(ctx->udev, 0), USB_CDC_SET_NTB_INPUT_SIZE, USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, - 0, iface_no, &dwNtbInMaxSize, 4, 1000); + 0, iface_no, dwNtbInMaxSize, 4, 1000); + kfree(dwNtbInMaxSize); } - +size_err: if (err < 0) pr_debug("Setting NTB Input Size failed\n"); } @@ -325,19 +342,29 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) /* set Max Datagram Size (MTU) */ if (flags & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE) { - __le16 max_datagram_size; + __le16 *max_datagram_size; u16 eth_max_sz = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize); + + max_datagram_size = kzalloc(sizeof(*max_datagram_size), + GFP_KERNEL); + if (!max_datagram_size) { + err = -ENOMEM; + goto max_dgram_err; + } + err = usb_control_msg(ctx->udev, usb_rcvctrlpipe(ctx->udev, 0), USB_CDC_GET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE, - 0, iface_no, &max_datagram_size, + 0, iface_no, max_datagram_size, 2, 1000); if (err < 0) { pr_debug("GET_MAX_DATAGRAM_SIZE failed, use size=%u\n", CDC_NCM_MIN_DATAGRAM_SIZE); + kfree(max_datagram_size); } else { - ctx->max_datagram_size = le16_to_cpu(max_datagram_size); + ctx->max_datagram_size = + le16_to_cpu(*max_datagram_size); /* Check Eth descriptor value */ if (eth_max_sz < CDC_NCM_MAX_DATAGRAM_SIZE) { if (ctx->max_datagram_size > eth_max_sz) @@ -360,8 +387,10 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, 0, - iface_no, &max_datagram_size, + iface_no, max_datagram_size, 2, 1000); + kfree(max_datagram_size); +max_dgram_err: if (err < 0) pr_debug("SET_MAX_DATAGRAM_SIZE failed\n"); } -- cgit v1.1 From 951f2f960e5bbce20309de44626cf11d17847712 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 8 Aug 2011 06:28:50 +0000 Subject: drivers/net/can/sja1000/plx_pci.c: eliminate double free In this code, the failure_cleanup label calls the function plx_pci_del_card, which frees everything in the card->net_dev array. dev is placed in this array immediately after allocation, so the two subsequent jumps to failure_cleanup should not also call free_sja1000dev, but the second one does. If plx_pci_check_sja1000 fails, then free_sja1000dev is also called on dev. Because dev is already in the card->net_dev array, this implies that when plx_pci_del_card is later called, it may get freed again. So that entry is reset to NULL after the free. Finally, if there is a problem with one channel, there will be a hole in the array. card->channels counts the number of channels that have succeeded, and does not keep track of the index of the largest element in the array that is valid. So the loop in plx_pci_del_card is changed to go up to PLX_PCI_MAX_CHAN, which is only 2. Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/can/sja1000/plx_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/sja1000/plx_pci.c b/drivers/net/can/sja1000/plx_pci.c index 231385b..c7f3d4e 100644 --- a/drivers/net/can/sja1000/plx_pci.c +++ b/drivers/net/can/sja1000/plx_pci.c @@ -408,7 +408,7 @@ static void plx_pci_del_card(struct pci_dev *pdev) struct sja1000_priv *priv; int i = 0; - for (i = 0; i < card->channels; i++) { + for (i = 0; i < PLX_PCI_MAX_CHAN; i++) { dev = card->net_dev[i]; if (!dev) continue; @@ -536,7 +536,6 @@ static int __devinit plx_pci_add_card(struct pci_dev *pdev, if (err) { dev_err(&pdev->dev, "Registering device failed " "(err=%d)\n", err); - free_sja1000dev(dev); goto failure_cleanup; } @@ -549,6 +548,7 @@ static int __devinit plx_pci_add_card(struct pci_dev *pdev, dev_err(&pdev->dev, "Channel #%d not detected\n", i + 1); free_sja1000dev(dev); + card->net_dev[i] = NULL; } } -- cgit v1.1 From 6a27cdeddf48858089e3672f844615cbf0877ebf Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Tue, 9 Aug 2011 20:15:50 +0000 Subject: net: sh_eth: Fix build by forgot including linux/interrupt.h By a6b7a407865aab9f849dd99a71072b7cd1175116, remove interrupt.h from netdevice.h. But this forget to revise sh_eth. This fix the build failure. error: expected '=', ',', ';', 'asm' or '__attribute__' before 'sh_eth_interrupt' error: implicit declaration of function 'request_irq' error: 'sh_eth_interrupt' undeclared (first use in this function) error: (Each undeclared identifier is reported only once drivers/net/sh_eth.c:1386: error: for each function it appears in.) error: 'IRQF_SHARED' undeclared (first use in this function) error: implicit declaration of function 'free_irq' Signed-off-by: Nobuhiro Iwamatsu CC: Alexey Dobriyan Signed-off-by: David S. Miller --- drivers/net/sh_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c index ad35c21..190f619 100644 --- a/drivers/net/sh_eth.c +++ b/drivers/net/sh_eth.c @@ -21,6 +21,7 @@ */ #include +#include #include #include #include -- cgit v1.1 From 32f7fd44ce3aae9ad204fb167d793c335608568d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 10 Aug 2011 01:19:48 +0000 Subject: gianfar: prevent buggy hw rx vlan tagging On some buggy chips, "vlan tag present" flag is set which causes packet loss. Fix this by checking if rx vlan accel is enabled in features. Reported-by: Michael Guntsche Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/gianfar.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 2659daa..31d5c57 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -2710,8 +2710,13 @@ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb, /* Tell the skb what kind of packet this is */ skb->protocol = eth_type_trans(skb, dev); - /* Set vlan tag */ - if (fcb->flags & RXFCB_VLN) + /* + * There's need to check for NETIF_F_HW_VLAN_RX here. + * Even if vlan rx accel is disabled, on some chips + * RXFCB_VLN is pseudo randomly set. + */ + if (dev->features & NETIF_F_HW_VLAN_RX && + fcb->flags & RXFCB_VLN) __vlan_hwaccel_put_tag(skb, fcb->vlctl); /* Send the packet up the stack */ -- cgit v1.1 From b88cf73d9278a5838e3ac2b670ab3b4ff533ea17 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 11 Aug 2011 14:39:59 +0000 Subject: net: add missing entries to Documentation/networking/00-INDEX A simple janitor duty patch that adds a one sentence overview to 00-INDEX for all files that lacked it. - does not add entries for subdirectories - does not modify existing entries. Signed-off-by: David S. Miller --- Documentation/networking/00-INDEX | 116 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 4edd78d..811252b 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -1,13 +1,21 @@ 00-INDEX - this file +3c359.txt + - information on the 3Com TokenLink Velocity XL (3c5359) driver. 3c505.txt - information on the 3Com EtherLink Plus (3c505) driver. +3c509.txt + - information on the 3Com Etherlink III Series Ethernet cards. 6pack.txt - info on the 6pack protocol, an alternative to KISS for AX.25 DLINK.txt - info on the D-Link DE-600/DE-620 parallel port pocket adapters PLIP.txt - PLIP: The Parallel Line Internet Protocol device driver +README.ipw2100 + - README for the Intel PRO/Wireless 2100 driver. +README.ipw2200 + - README for the Intel PRO/Wireless 2915ABG and 2200BG driver. README.sb1000 - info on General Instrument/NextLevel SURFboard1000 cable modem. alias.txt @@ -20,8 +28,12 @@ atm.txt - info on where to get ATM programs and support for Linux. ax25.txt - info on using AX.25 and NET/ROM code for Linux +batman-adv.txt + - B.A.T.M.A.N routing protocol on top of layer 2 Ethernet Frames. baycom.txt - info on the driver for Baycom style amateur radio modems +bonding.txt + - Linux Ethernet Bonding Driver HOWTO: link aggregation in Linux. bridge.txt - where to get user space programs for ethernet bridging with Linux. can.txt @@ -34,32 +46,60 @@ cxacru.txt - Conexant AccessRunner USB ADSL Modem cxacru-cf.py - Conexant AccessRunner USB ADSL Modem configuration file parser +cxgb.txt + - Release Notes for the Chelsio N210 Linux device driver. +dccp.txt + - the Datagram Congestion Control Protocol (DCCP) (RFC 4340..42). de4x5.txt - the Digital EtherWORKS DE4?? and DE5?? PCI Ethernet driver decnet.txt - info on using the DECnet networking layer in Linux. depca.txt - the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver +dl2k.txt + - README for D-Link DL2000-based Gigabit Ethernet Adapters (dl2k.ko). +dm9000.txt + - README for the Simtec DM9000 Network driver. dmfe.txt - info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver. +dns_resolver.txt + - The DNS resolver module allows kernel servies to make DNS queries. +driver.txt + - Softnet driver issues. e100.txt - info on Intel's EtherExpress PRO/100 line of 10/100 boards e1000.txt - info on Intel's E1000 line of gigabit ethernet boards +e1000e.txt + - README for the Intel Gigabit Ethernet Driver (e1000e). eql.txt - serial IP load balancing ewrk3.txt - the Digital EtherWORKS 3 DE203/4/5 Ethernet driver +fib_trie.txt + - Level Compressed Trie (LC-trie) notes: a structure for routing. filter.txt - Linux Socket Filtering fore200e.txt - FORE Systems PCA-200E/SBA-200E ATM NIC driver info. framerelay.txt - info on using Frame Relay/Data Link Connection Identifier (DLCI). +gen_stats.txt + - Generic networking statistics for netlink users. +generic_hdlc.txt + - The generic High Level Data Link Control (HDLC) layer. generic_netlink.txt - info on Generic Netlink +gianfar.txt + - Gianfar Ethernet Driver. ieee802154.txt - Linux IEEE 802.15.4 implementation, API and drivers +ifenslave.c + - Configure network interfaces for parallel routing (bonding). +igb.txt + - README for the Intel Gigabit Ethernet Driver (igb). +igbvf.txt + - README for the Intel Gigabit Ethernet Driver (igbvf). ip-sysctl.txt - /proc/sys/net/ipv4/* variables ip_dynaddr.txt @@ -68,41 +108,117 @@ ipddp.txt - AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation iphase.txt - Interphase PCI ATM (i)Chip IA Linux driver info. +ipv6.txt + - Options to the ipv6 kernel module. +ipvs-sysctl.txt + - Per-inode explanation of the /proc/sys/net/ipv4/vs interface. irda.txt - where to get IrDA (infrared) utilities and info for Linux. +ixgb.txt + - README for the Intel 10 Gigabit Ethernet Driver (ixgb). +ixgbe.txt + - README for the Intel 10 Gigabit Ethernet Driver (ixgbe). +ixgbevf.txt + - README for the Intel Virtual Function (VF) Driver (ixgbevf). +l2tp.txt + - User guide to the L2TP tunnel protocol. lapb-module.txt - programming information of the LAPB module. ltpc.txt - the Apple or Farallon LocalTalk PC card driver +mac80211-injection.txt + - HOWTO use packet injection with mac80211 multicast.txt - Behaviour of cards under Multicast +multiqueue.txt + - HOWTO for multiqueue network device support. +netconsole.txt + - The network console module netconsole.ko: configuration and notes. +netdev-features.txt + - Network interface "feature mess and how to get out from it alive". netdevices.txt - info on network device driver functions exported to the kernel. +netif-msg.txt + - Design of the network interface message level setting (NETIF_MSG_*). +nfc.txt + - The Linux Near Field Communication (NFS) subsystem. olympic.txt - IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info. +operstates.txt + - Overview of network interface operational states. +packet_mmap.txt + - User guide to memory mapped packet socket rings (PACKET_[RT]X_RING). +phonet.txt + - The Phonet packet protocol used in Nokia cellular modems. +phy.txt + - The PHY abstraction layer. +pktgen.txt + - User guide to the kernel packet generator (pktgen.ko). policy-routing.txt - IP policy-based routing +ppp_generic.txt + - Information about the generic PPP driver. +proc_net_tcp.txt + - Per inode overview of the /proc/net/tcp and /proc/net/tcp6 interfaces. +radiotap-headers.txt + - Background on radiotap headers. ray_cs.txt - Raylink Wireless LAN card driver info. +rds.txt + - Background on the reliable, ordered datagram delivery method RDS. +regulatory.txt + - Overview of the Linux wireless regulatory infrastructure. +rxrpc.txt + - Guide to the RxRPC protocol. +s2io.txt + - Release notes for Neterion Xframe I/II 10GbE driver. +scaling.txt + - Explanation of network scaling techniques: RSS, RPS, RFS, aRFS, XPS. +sctp.txt + - Notes on the Linux kernel implementation of the SCTP protocol. +secid.txt + - Explanation of the secid member in flow structures. skfp.txt - SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info. smc9.txt - the driver for SMC's 9000 series of Ethernet cards smctr.txt - SMC TokenCard TokenRing Linux driver info. +spider-net.txt + - README for the Spidernet Driver (as found in PS3 / Cell BE). +stmmac.txt + - README for the STMicro Synopsys Ethernet driver. +tc-actions-env-rules.txt + - rules for traffic control (tc) actions. +timestamping.txt + - overview of network packet timestamping variants. tcp.txt - short blurb on how TCP output takes place. +tcp-thin.txt + - kernel tuning options for low rate 'thin' TCP streams. tlan.txt - ThunderLAN (Compaq Netelligent 10/100, Olicom OC-2xxx) driver info. tms380tr.txt - SysKonnect Token Ring ISA/PCI adapter driver info. +tproxy.txt + - Transparent proxy support user guide. tuntap.txt - TUN/TAP device driver, allowing user space Rx/Tx of packets. +udplite.txt + - UDP-Lite protocol (RFC 3828) introduction. vortex.txt - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards. +vxge.txt + - README for the Neterion X3100 PCIe Server Adapter. x25.txt - general info on X.25 development. x25-iface.txt - description of the X.25 Packet Layer to LAPB device interface. +xfrm_proc.txt + - description of the statistics package for XFRM. +xfrm_sync.txt + - sync patches for XFRM enable migration of an SA between hosts. +xfrm_sysctl.txt + - description of the XFRM configuration options. z8530drv.txt - info about Linux driver for Z8530 based HDLC cards for AX.25 -- cgit v1.1 From 320f24e482e6b390c608c6afec253405f9ab7436 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 11 Aug 2011 14:41:48 +0000 Subject: net: minor update to Documentation/networking/scaling.txt Incorporate last comments about hyperthreading, interrupt coalescing and the definition of cache domains into the network scaling document scaling.txt Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/scaling.txt | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt index 7254b4b..58fd741 100644 --- a/Documentation/networking/scaling.txt +++ b/Documentation/networking/scaling.txt @@ -52,7 +52,8 @@ module parameter for specifying the number of hardware queues to configure. In the bnx2x driver, for instance, this parameter is called num_queues. A typical RSS configuration would be to have one receive queue for each CPU if the device supports enough queues, or otherwise at least -one for each cache domain at a particular cache level (L1, L2, etc.). +one for each memory domain, where a memory domain is a set of CPUs that +share a particular memory level (L1, L2, NUMA node, etc.). The indirection table of an RSS device, which resolves a queue by masked hash, is usually programmed by the driver at initialization. The @@ -82,11 +83,17 @@ RSS should be enabled when latency is a concern or whenever receive interrupt processing forms a bottleneck. Spreading load between CPUs decreases queue length. For low latency networking, the optimal setting is to allocate as many queues as there are CPUs in the system (or the -NIC maximum, if lower). Because the aggregate number of interrupts grows -with each additional queue, the most efficient high-rate configuration +NIC maximum, if lower). The most efficient high-rate configuration is likely the one with the smallest number of receive queues where no -CPU that processes receive interrupts reaches 100% utilization. Per-cpu -load can be observed using the mpstat utility. +receive queue overflows due to a saturated CPU, because in default +mode with interrupt coalescing enabled, the aggregate number of +interrupts (and thus work) grows with each additional queue. + +Per-cpu load can be observed using the mpstat utility, but note that on +processors with hyperthreading (HT), each hyperthread is represented as +a separate CPU. For interrupt handling, HT has shown no benefit in +initial tests, so limit the number of queues to the number of CPU cores +in the system. RPS: Receive Packet Steering @@ -145,7 +152,7 @@ the bitmap. == Suggested Configuration For a single queue device, a typical RPS configuration would be to set -the rps_cpus to the CPUs in the same cache domain of the interrupting +the rps_cpus to the CPUs in the same memory domain of the interrupting CPU. If NUMA locality is not an issue, this could also be all CPUs in the system. At high interrupt rate, it might be wise to exclude the interrupting CPU from the map since that already performs much work. @@ -154,7 +161,7 @@ For a multi-queue system, if RSS is configured so that a hardware receive queue is mapped to each CPU, then RPS is probably redundant and unnecessary. If there are fewer hardware queues than CPUs, then RPS might be beneficial if the rps_cpus for each queue are the ones that -share the same cache domain as the interrupting CPU for that queue. +share the same memory domain as the interrupting CPU for that queue. RFS: Receive Flow Steering @@ -326,7 +333,7 @@ The queue chosen for transmitting a particular flow is saved in the corresponding socket structure for the flow (e.g. a TCP connection). This transmit queue is used for subsequent packets sent on the flow to prevent out of order (ooo) packets. The choice also amortizes the cost -of calling get_xps_queues() over all packets in the connection. To avoid +of calling get_xps_queues() over all packets in the flow. To avoid ooo packets, the queue for a flow can subsequently only be changed if skb->ooo_okay is set for a packet in the flow. This flag indicates that there are no outstanding packets in the flow, so the transmit queue can -- cgit v1.1 From 588dc91151d99e9307c2f9a8468453274fe43ecd Mon Sep 17 00:00:00 2001 From: Wang Shaoyan Date: Thu, 11 Aug 2011 17:07:25 +0000 Subject: gianfar: reduce stack usage in gianfar_ethtool.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/net/gianfar_ethtool.c:765: warning: the frame size of 2048 bytes is larger than 1024 bytes Signed-off-by: Wang Shaoyan Reviewed-and-tested-by: Sebastian Pöhn Signed-off-by: David S. Miller --- drivers/net/gianfar_ethtool.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c index 6e35069..25a8c2a 100644 --- a/drivers/net/gianfar_ethtool.c +++ b/drivers/net/gianfar_ethtool.c @@ -686,10 +686,21 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u { unsigned int last_rule_idx = priv->cur_filer_idx; unsigned int cmp_rqfpr; - unsigned int local_rqfpr[MAX_FILER_IDX + 1]; - unsigned int local_rqfcr[MAX_FILER_IDX + 1]; + unsigned int *local_rqfpr; + unsigned int *local_rqfcr; int i = 0x0, k = 0x0; int j = MAX_FILER_IDX, l = 0x0; + int ret = 1; + + local_rqfpr = kmalloc(sizeof(unsigned int) * (MAX_FILER_IDX + 1), + GFP_KERNEL); + local_rqfcr = kmalloc(sizeof(unsigned int) * (MAX_FILER_IDX + 1), + GFP_KERNEL); + if (!local_rqfpr || !local_rqfcr) { + pr_err("Out of memory\n"); + ret = 0; + goto err; + } switch (class) { case TCP_V4_FLOW: @@ -706,7 +717,8 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u break; default: pr_err("Right now this class is not supported\n"); - return 0; + ret = 0; + goto err; } for (i = 0; i < MAX_FILER_IDX + 1; i++) { @@ -721,7 +733,8 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u if (i == MAX_FILER_IDX + 1) { pr_err("No parse rule found, can't create hash rules\n"); - return 0; + ret = 0; + goto err; } /* If a match was found, then it begins the starting of a cluster rule @@ -765,7 +778,10 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u priv->cur_filer_idx = priv->cur_filer_idx - 1; } - return 1; +err: + kfree(local_rqfcr); + kfree(local_rqfpr); + return ret; } static int gfar_set_hash_opts(struct gfar_private *priv, struct ethtool_rxnfc *cmd) -- cgit v1.1 From a115c72802c37351b6d87dfb62938d2ad440eef4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 4 Aug 2011 13:23:38 +0900 Subject: ASoC: Move WM8962 CLKREG_OVD earlier When the clocking registers are not overriden some of the registers are not writable. Signed-off-by: Mark Brown Acked-by: Liam Girdwood Cc: stable@kernel.org --- sound/soc/codecs/wm8962.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 60d740e..28650ed 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2927,10 +2927,6 @@ static int wm8962_set_bias_level(struct snd_soc_codec *codec, WM8962_BIAS_ENA | 0x180); msleep(5); - - snd_soc_update_bits(codec, WM8962_CLOCKING2, - WM8962_CLKREG_OVD, - WM8962_CLKREG_OVD); } /* VMID 2*250k */ @@ -3868,6 +3864,10 @@ static int wm8962_probe(struct snd_soc_codec *codec) */ snd_soc_update_bits(codec, WM8962_CLOCKING2, WM8962_SYSCLK_ENA, 0); + /* Ensure we have soft control over all registers */ + snd_soc_update_bits(codec, WM8962_CLOCKING2, + WM8962_CLKREG_OVD, WM8962_CLKREG_OVD); + regulator_bulk_disable(ARRAY_SIZE(wm8962->supplies), wm8962->supplies); if (pdata) { -- cgit v1.1 From fd049755636a8b2cc084e088967dd566467ccebc Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Fri, 12 Aug 2011 17:52:59 +0300 Subject: ASoC: h1940: Fix compilation error due to missing header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add linux/types.h to fix this compilation error: In file included from arch/arm/mach-s3c2410/include/mach/gpio-fns.h:27:0, from arch/arm/mach-s3c2410/include/mach/gpio.h:27, from /home/anarsoul/work/pda-linux/linux-next/arch/arm/include/asm/gpio.h:5, from include/linux/gpio.h:18, from sound/soc/samsung/rx1950_uda1380.c:20: arch/arm/plat-samsung/include/plat/gpio-cfg.h:29:34: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:30:34: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s5p_gpio_drvstr_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:57:2: error: expected specifier-qualifier-list before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:148:47: error: expected declaration specifiers or ‘...’ before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:156:24: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s3c_gpio_getpull’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:175:24: error: expected declaration specifiers or ‘...’ before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h: In function ‘s3c_gpio_cfgrange_nopull’: arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: ‘s3c_gpio_pull_t’ undeclared (first use in this function) arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: note: each undeclared identifier is reported only once for each function it appears in arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: expected ‘)’ before numeric constant arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: too many arguments to function ‘s3c_gpio_cfgall_range’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:174:12: note: declared here arch/arm/plat-samsung/include/plat/gpio-cfg.h: At top level: arch/arm/plat-samsung/include/plat/gpio-cfg.h:199:26: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s5p_gpio_get_drvstr’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:210:50: error: expected declaration specifiers or ‘...’ before ‘s5p_gpio_drvstr_t’ Signed-off-by: Vasily Khoruzhick Acked-by: Jassi Brar Signed-off-by: Mark Brown --- sound/soc/samsung/h1940_uda1380.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/samsung/h1940_uda1380.c b/sound/soc/samsung/h1940_uda1380.c index 241f55d..c6c6589 100644 --- a/sound/soc/samsung/h1940_uda1380.c +++ b/sound/soc/samsung/h1940_uda1380.c @@ -13,6 +13,7 @@ * */ +#include #include #include -- cgit v1.1 From b8487928f5ca2976e4cb8d329943af849d2b6197 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Fri, 12 Aug 2011 17:53:00 +0300 Subject: ASoC: rx1950: Fix compilation error due to missing header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add linux/types.h to fix this compilation error: In file included from arch/arm/mach-s3c2410/include/mach/gpio-fns.h:27:0, from arch/arm/mach-s3c2410/include/mach/gpio.h:27, from /home/anarsoul/work/pda-linux/linux-next/arch/arm/include/asm/gpio.h:5, from include/linux/gpio.h:18, from sound/soc/samsung/rx1950_uda1380.c:20: arch/arm/plat-samsung/include/plat/gpio-cfg.h:29:34: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:30:34: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s5p_gpio_drvstr_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:57:2: error: expected specifier-qualifier-list before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:148:47: error: expected declaration specifiers or ‘...’ before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:156:24: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s3c_gpio_getpull’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:175:24: error: expected declaration specifiers or ‘...’ before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h: In function ‘s3c_gpio_cfgrange_nopull’: arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: ‘s3c_gpio_pull_t’ undeclared (first use in this function) arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: note: each undeclared identifier is reported only once for each function it appears in arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: expected ‘)’ before numeric constant arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: too many arguments to function ‘s3c_gpio_cfgall_range’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:174:12: note: declared here arch/arm/plat-samsung/include/plat/gpio-cfg.h: At top level: arch/arm/plat-samsung/include/plat/gpio-cfg.h:199:26: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s5p_gpio_get_drvstr’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:210:50: error: expected declaration specifiers or ‘...’ before ‘s5p_gpio_drvstr_t’ Signed-off-by: Vasily Khoruzhick Acked-by: Jassi Brar Signed-off-by: Mark Brown --- sound/soc/samsung/rx1950_uda1380.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/samsung/rx1950_uda1380.c b/sound/soc/samsung/rx1950_uda1380.c index 1e574a5..bc8c167 100644 --- a/sound/soc/samsung/rx1950_uda1380.c +++ b/sound/soc/samsung/rx1950_uda1380.c @@ -17,6 +17,7 @@ * */ +#include #include #include -- cgit v1.1 From 17f2ae7f677f023997e02fd2ebabd90ea2a0390d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 14 Aug 2011 13:34:31 +0200 Subject: PM / Domains: Fix build for CONFIG_PM_RUNTIME unset Function genpd_queue_power_off_work() is not defined for CONFIG_PM_RUNTIME, so pm_genpd_poweroff_unused() causes a build error to happen in that case. Fix the problem by making pm_genpd_poweroff_unused() depend on CONFIG_PM_RUNTIME too. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 30 +++++++++++++++--------------- include/linux/pm_domain.h | 10 +++++++--- kernel/power/Kconfig | 4 ++++ 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e18566a..1c37457 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -460,6 +460,21 @@ static int pm_genpd_runtime_resume(struct device *dev) return 0; } +/** + * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use. + */ +void pm_genpd_poweroff_unused(void) +{ + struct generic_pm_domain *genpd; + + mutex_lock(&gpd_list_lock); + + list_for_each_entry(genpd, &gpd_list, gpd_list_node) + genpd_queue_power_off_work(genpd); + + mutex_unlock(&gpd_list_lock); +} + #else static inline void genpd_power_off_work_fn(struct work_struct *work) {} @@ -1255,18 +1270,3 @@ void pm_genpd_init(struct generic_pm_domain *genpd, list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); } - -/** - * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use. - */ -void pm_genpd_poweroff_unused(void) -{ - struct generic_pm_domain *genpd; - - mutex_lock(&gpd_list_lock); - - list_for_each_entry(genpd, &gpd_list, gpd_list_node) - genpd_queue_power_off_work(genpd); - - mutex_unlock(&gpd_list_lock); -} diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 21097cb..f9ec173 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -72,8 +72,6 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); extern int pm_genpd_poweron(struct generic_pm_domain *genpd); -extern void pm_genpd_poweroff_unused(void); -extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); #else static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) @@ -101,8 +99,14 @@ static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) { return -ENOSYS; } -static inline void pm_genpd_poweroff_unused(void) {} +#endif + +#ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME +extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); +extern void pm_genpd_poweroff_unused(void); +#else static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {} +static inline void pm_genpd_poweroff_unused(void) {} #endif #endif /* _LINUX_PM_DOMAIN_H */ diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index b1914cb9..3744c59 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -231,3 +231,7 @@ config PM_CLK config PM_GENERIC_DOMAINS bool depends on PM + +config PM_GENERIC_DOMAINS_RUNTIME + def_bool y + depends on PM_RUNTIME && PM_GENERIC_DOMAINS -- cgit v1.1 From da6094ea7d3c2295473d8f5134279307255d6ebf Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sun, 14 Aug 2011 11:31:16 +0200 Subject: ALSA: snd_usb_caiaq: track submitted output urbs The snd_usb_caiaq driver currently assumes that output urbs are serviced in time and doesn't track when and whether they are given back by the USB core. That usually works fine, but due to temporary limitations of the XHCI stack, we faced that urbs were submitted more than once with this approach. As it's no good practice to fire and forget urbs anyway, this patch introduces a proper bit mask to track which requests have been submitted and given back. That alone however doesn't make the driver work in case the host controller is broken and doesn't give back urbs at all, and the output stream will stop once all pre-allocated output urbs are consumed. But it does prevent crashes of the controller stack in such cases. See http://bugzilla.kernel.org/show_bug.cgi?id=40702 for more details. Signed-off-by: Daniel Mack Reported-and-tested-by: Matej Laitl Cc: Sarah Sharp Cc: stable@kernel.org Signed-off-by: Takashi Iwai --- sound/usb/caiaq/audio.c | 31 +++++++++++++++++++++++++++---- sound/usb/caiaq/device.h | 1 + 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/sound/usb/caiaq/audio.c b/sound/usb/caiaq/audio.c index aa52b3e..2cf87f5 100644 --- a/sound/usb/caiaq/audio.c +++ b/sound/usb/caiaq/audio.c @@ -139,8 +139,12 @@ static void stream_stop(struct snd_usb_caiaqdev *dev) for (i = 0; i < N_URBS; i++) { usb_kill_urb(dev->data_urbs_in[i]); - usb_kill_urb(dev->data_urbs_out[i]); + + if (test_bit(i, &dev->outurb_active_mask)) + usb_kill_urb(dev->data_urbs_out[i]); } + + dev->outurb_active_mask = 0; } static int snd_usb_caiaq_substream_open(struct snd_pcm_substream *substream) @@ -612,8 +616,8 @@ static void read_completed(struct urb *urb) { struct snd_usb_caiaq_cb_info *info = urb->context; struct snd_usb_caiaqdev *dev; - struct urb *out; - int frame, len, send_it = 0, outframe = 0; + struct urb *out = NULL; + int i, frame, len, send_it = 0, outframe = 0; size_t offset = 0; if (urb->status || !info) @@ -624,7 +628,17 @@ static void read_completed(struct urb *urb) if (!dev->streaming) return; - out = dev->data_urbs_out[info->index]; + /* find an unused output urb that is unused */ + for (i = 0; i < N_URBS; i++) + if (test_and_set_bit(i, &dev->outurb_active_mask) == 0) { + out = dev->data_urbs_out[i]; + break; + } + + if (!out) { + log("Unable to find an output urb to use\n"); + goto requeue; + } /* read the recently received packet and send back one which has * the same layout */ @@ -655,8 +669,12 @@ static void read_completed(struct urb *urb) out->number_of_packets = outframe; out->transfer_flags = URB_ISO_ASAP; usb_submit_urb(out, GFP_ATOMIC); + } else { + struct snd_usb_caiaq_cb_info *oinfo = out->context; + clear_bit(oinfo->index, &dev->outurb_active_mask); } +requeue: /* re-submit inbound urb */ for (frame = 0; frame < FRAMES_PER_URB; frame++) { urb->iso_frame_desc[frame].offset = BYTES_PER_FRAME * frame; @@ -678,6 +696,8 @@ static void write_completed(struct urb *urb) dev->output_running = 1; wake_up(&dev->prepare_wait_queue); } + + clear_bit(info->index, &dev->outurb_active_mask); } static struct urb **alloc_urbs(struct snd_usb_caiaqdev *dev, int dir, int *ret) @@ -829,6 +849,9 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev) if (!dev->data_cb_info) return -ENOMEM; + dev->outurb_active_mask = 0; + BUILD_BUG_ON(N_URBS > (sizeof(dev->outurb_active_mask) * 8)); + for (i = 0; i < N_URBS; i++) { dev->data_cb_info[i].dev = dev; dev->data_cb_info[i].index = i; diff --git a/sound/usb/caiaq/device.h b/sound/usb/caiaq/device.h index b2b3101..3f9c633 100644 --- a/sound/usb/caiaq/device.h +++ b/sound/usb/caiaq/device.h @@ -96,6 +96,7 @@ struct snd_usb_caiaqdev { int input_panic, output_panic, warned; char *audio_in_buf, *audio_out_buf; unsigned int samplerates, bpp; + unsigned long outurb_active_mask; struct snd_pcm_substream *sub_playback[MAX_STREAMS]; struct snd_pcm_substream *sub_capture[MAX_STREAMS]; -- cgit v1.1 From eade7b281c9fc18401b989c77d5e5e660b25a3b7 Mon Sep 17 00:00:00 2001 From: Daniel T Chen Date: Sun, 14 Aug 2011 22:43:01 -0400 Subject: ALSA: ac97: Add HP Compaq dc5100 SFF(PT003AW) to Headphone Jack Sense whitelist BugLink: https://bugs.launchpad.net/bugs/826081 The original reporter needs 'Headphone Jack Sense' enabled to have audible audio, so add his PCI SSID to the whitelist. Reported-and-tested-by: Muhammad Khurram Khan Cc: Signed-off-by: Daniel T Chen Signed-off-by: Takashi Iwai --- sound/pci/ac97/ac97_patch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 200c9a1..a872d0a 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -1909,6 +1909,7 @@ static unsigned int ad1981_jacks_whitelist[] = { 0x103c0944, /* HP nc6220 */ 0x103c0934, /* HP nc8220 */ 0x103c006d, /* HP nx9105 */ + 0x103c300d, /* HP Compaq dc5100 SFF(PT003AW) */ 0x17340088, /* FSC Scenic-W */ 0 /* end */ }; -- cgit v1.1 From 2a004c686e7997ddb795dbce10b263e241f9bdaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Fri, 17 Jun 2011 11:09:07 +0100 Subject: ARM: 6965/1: ep93xx: add model detection for ts-7300 and ts-7400 boards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Ryan Mallon Acked-by: H Hartley Sweeten Signed-off-by: Petr Štetiar Signed-off-by: Russell King --- arch/arm/mach-ep93xx/include/mach/ts72xx.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm/mach-ep93xx/include/mach/ts72xx.h b/arch/arm/mach-ep93xx/include/mach/ts72xx.h index 0eabec6..ee7f875 100644 --- a/arch/arm/mach-ep93xx/include/mach/ts72xx.h +++ b/arch/arm/mach-ep93xx/include/mach/ts72xx.h @@ -20,6 +20,8 @@ #define TS72XX_MODEL_TS7200 0x00 #define TS72XX_MODEL_TS7250 0x01 #define TS72XX_MODEL_TS7260 0x02 +#define TS72XX_MODEL_TS7300 0x03 +#define TS72XX_MODEL_TS7400 0x04 #define TS72XX_OPTIONS_PHYS_BASE 0x22400000 @@ -66,6 +68,16 @@ static inline int board_is_ts7260(void) return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7260; } +static inline int board_is_ts7300(void) +{ + return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7300; +} + +static inline int board_is_ts7400(void) +{ + return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7400; +} + static inline int is_max197_installed(void) { return !!(__raw_readb(TS72XX_OPTIONS_VIRT_BASE) & -- cgit v1.1 From 505ed6fd82608bd4f26d487220ec40a3c5d0dded Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Fri, 17 Jun 2011 11:11:59 +0100 Subject: ARM: 6967/1: ep93xx: ts72xx: fix board model detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the obvious error in board detection logic, because according to the TS's manual, the model is stored in the least three significant bits. For example the byte read on my ts-7300 is 0x23 and the detection then fails. Cc: Ryan Mallon Acked-by: H Hartley Sweeten Signed-off-by: Petr Štetiar Signed-off-by: Russell King --- arch/arm/mach-ep93xx/include/mach/ts72xx.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-ep93xx/include/mach/ts72xx.h b/arch/arm/mach-ep93xx/include/mach/ts72xx.h index ee7f875..f1397a1 100644 --- a/arch/arm/mach-ep93xx/include/mach/ts72xx.h +++ b/arch/arm/mach-ep93xx/include/mach/ts72xx.h @@ -6,7 +6,7 @@ * TS72xx memory map: * * virt phys size - * febff000 22000000 4K model number register + * febff000 22000000 4K model number register (bits 0-2) * febfe000 22400000 4K options register * febfd000 22800000 4K options register #2 * febf9000 10800000 4K TS-5620 RTC index register @@ -22,6 +22,7 @@ #define TS72XX_MODEL_TS7260 0x02 #define TS72XX_MODEL_TS7300 0x03 #define TS72XX_MODEL_TS7400 0x04 +#define TS72XX_MODEL_MASK 0x07 #define TS72XX_OPTIONS_PHYS_BASE 0x22400000 @@ -53,29 +54,34 @@ #ifndef __ASSEMBLY__ +static inline int ts72xx_model(void) +{ + return __raw_readb(TS72XX_MODEL_VIRT_BASE) & TS72XX_MODEL_MASK; +} + static inline int board_is_ts7200(void) { - return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7200; + return ts72xx_model() == TS72XX_MODEL_TS7200; } static inline int board_is_ts7250(void) { - return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7250; + return ts72xx_model() == TS72XX_MODEL_TS7250; } static inline int board_is_ts7260(void) { - return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7260; + return ts72xx_model() == TS72XX_MODEL_TS7260; } static inline int board_is_ts7300(void) { - return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7300; + return ts72xx_model() == TS72XX_MODEL_TS7300; } static inline int board_is_ts7400(void) { - return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7400; + return ts72xx_model() == TS72XX_MODEL_TS7400; } static inline int is_max197_installed(void) -- cgit v1.1 From 43c734be5571a4daad9f0a3e0b3229a1c0049917 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 15 Aug 2011 10:43:44 +0100 Subject: ARM: 7014/1: cache-l2x0: Fix L2 Cache size calculation. This patch fixes L2 Cache size calculations for L2C-210, L2C-310 and PL310, by changing the L2X0_AUX_CTRL_WAY_SIZE_MASK from 2 bits to 3 bits. The Auxiliary Control Register for L2C-210, L2C-310 and PL310 has 3bits [19:17] for Way size, however the existing code only uses 2 bits to get this value. This results in incorrect cachesize calculations. It also results in performing operations on the whole cache when we erroneously decide that the range is big enough (due to l2x0_size being too small) and also prints incorrect cachesize. Signed-off-by: Srinivas Kandagatla Acked-by: Will Deacon Cc: stable@kernel.org Signed-off-by: Russell King --- arch/arm/include/asm/hardware/cache-l2x0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h index 16bd480..bfa706f 100644 --- a/arch/arm/include/asm/hardware/cache-l2x0.h +++ b/arch/arm/include/asm/hardware/cache-l2x0.h @@ -64,7 +64,7 @@ #define L2X0_AUX_CTRL_MASK 0xc0000fff #define L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT 16 #define L2X0_AUX_CTRL_WAY_SIZE_SHIFT 17 -#define L2X0_AUX_CTRL_WAY_SIZE_MASK (0x3 << 17) +#define L2X0_AUX_CTRL_WAY_SIZE_MASK (0x7 << 17) #define L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT 22 #define L2X0_AUX_CTRL_NS_LOCKDOWN_SHIFT 26 #define L2X0_AUX_CTRL_NS_INT_CTRL_SHIFT 27 -- cgit v1.1 From 145e10e173c8adf4804334fb0dd10028300a7a7a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 15 Aug 2011 11:04:41 +0100 Subject: ARM: 7015/1: ARM errata: Possible cache data corruption with hit-under-miss enabled This patch is a workaround for the 364296 ARM1136 r0p2 erratum (possible cache data corruption with hit-under-miss enabled). It sets the undocumented bit 31 in the auxiliary control register and the FI bit in the control register, thus disabling hit-under-miss without putting the processor into full low interrupt latency mode. Signed-off-by: Catalin Marinas Tested-by: Siarhei Siamashka Signed-off-by: Russell King --- arch/arm/Kconfig | 12 ++++++++++++ arch/arm/mm/proc-v6.S | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5ebc5d9..3269576 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1271,6 +1271,18 @@ config ARM_ERRATA_754327 This workaround defines cpu_relax() as smp_mb(), preventing correctly written polling loops from denying visibility of updates to memory. +config ARM_ERRATA_364296 + bool "ARM errata: Possible cache data corruption with hit-under-miss enabled" + depends on CPU_V6 && !SMP + help + This options enables the workaround for the 364296 ARM1136 + r0p2 erratum (possible cache data corruption with + hit-under-miss enabled). It sets the undocumented bit 31 in + the auxiliary control register and the FI bit in the control + register, thus disabling hit-under-miss without putting the + processor into full low interrupt latency mode. ARM11MPCore + is not affected. + endmenu source "arch/arm/common/Kconfig" diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 219138d..a923aa0 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -223,6 +223,22 @@ __v6_setup: mrc p15, 0, r0, c1, c0, 0 @ read control register bic r0, r0, r5 @ clear bits them orr r0, r0, r6 @ set them +#ifdef CONFIG_ARM_ERRATA_364296 + /* + * Workaround for the 364296 ARM1136 r0p2 erratum (possible cache data + * corruption with hit-under-miss enabled). The conditional code below + * (setting the undocumented bit 31 in the auxiliary control register + * and the FI bit in the control register) disables hit-under-miss + * without putting the processor into full low interrupt latency mode. + */ + ldr r6, =0x4107b362 @ id for ARM1136 r0p2 + mrc p15, 0, r5, c0, c0, 0 @ get processor id + teq r5, r6 @ check for the faulty core + mrceq p15, 0, r5, c1, c0, 1 @ load aux control reg + orreq r5, r5, #(1 << 31) @ set the undocumented bit 31 + mcreq p15, 0, r5, c1, c0, 1 @ write aux control reg + orreq r0, r0, #(1 << 21) @ low interrupt latency configuration +#endif mov pc, lr @ return to head.S:__ret /* -- cgit v1.1 From d2b4c7bd7eabfaa2e3e5b8107d5eeb56ac879813 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 13 Aug 2011 19:15:01 +0800 Subject: ASoC: soc-jack: Fix checking return value of request_any_context_irq request_any_context_irq() returns a negative value on failure. On success, it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED. Signed-off-by: Axel Lin Signed-off-by: Mark Brown Cc: stable@kernel.orG --- sound/soc/soc-jack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-jack.c b/sound/soc/soc-jack.c index 7c17b98..38b0013 100644 --- a/sound/soc/soc-jack.c +++ b/sound/soc/soc-jack.c @@ -327,7 +327,7 @@ int snd_soc_jack_add_gpios(struct snd_soc_jack *jack, int count, IRQF_TRIGGER_FALLING, gpios[i].name, &gpios[i]); - if (ret) + if (ret < 0) goto err; if (gpios[i].wake) { -- cgit v1.1 From 161d55c3ec4c7e26c96b11dc86caea0b3c9c6b0f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 13 Aug 2011 11:33:08 +0800 Subject: ASoC: sta32x: Fix a memory leak if snd_soc_register_codec fails Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- sound/soc/codecs/sta32x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/sta32x.c b/sound/soc/codecs/sta32x.c index 409d89d..fbd7eb9 100644 --- a/sound/soc/codecs/sta32x.c +++ b/sound/soc/codecs/sta32x.c @@ -857,6 +857,7 @@ static __devinit int sta32x_i2c_probe(struct i2c_client *i2c, ret = snd_soc_register_codec(&i2c->dev, &sta32x_codec, &sta32x_dai, 1); if (ret != 0) { dev_err(&i2c->dev, "Failed to register codec (%d)\n", ret); + kfree(sta32x); return ret; } -- cgit v1.1 From bf545ed72f2eeac664695a8ea2199d9ddaef6020 Mon Sep 17 00:00:00 2001 From: Scott Jiang Date: Fri, 12 Aug 2011 18:04:10 -0400 Subject: ASoC: ad193x: fix registers definition fix dac word len mask and adc tdm fmt shift value Signed-off-by: Scott Jiang Signed-off-by: Mark Brown Cc: stable@kernel.org --- sound/soc/codecs/ad193x.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/ad193x.h b/sound/soc/codecs/ad193x.h index 9747b54..c1029d2 100644 --- a/sound/soc/codecs/ad193x.h +++ b/sound/soc/codecs/ad193x.h @@ -34,7 +34,7 @@ #define AD193X_DAC_LEFT_HIGH (1 << 3) #define AD193X_DAC_BCLK_INV (1 << 7) #define AD193X_DAC_CTRL2 0x804 -#define AD193X_DAC_WORD_LEN_MASK 0xC +#define AD193X_DAC_WORD_LEN_MASK 0x18 #define AD193X_DAC_MASTER_MUTE 1 #define AD193X_DAC_CHNL_MUTE 0x805 #define AD193X_DACL1_MUTE 0 @@ -63,7 +63,7 @@ #define AD193X_ADC_CTRL1 0x80f #define AD193X_ADC_SERFMT_MASK 0x60 #define AD193X_ADC_SERFMT_STEREO (0 << 5) -#define AD193X_ADC_SERFMT_TDM (1 << 2) +#define AD193X_ADC_SERFMT_TDM (1 << 5) #define AD193X_ADC_SERFMT_AUX (2 << 5) #define AD193X_ADC_WORD_LEN_MASK 0x3 #define AD193X_ADC_CTRL2 0x810 -- cgit v1.1 From 95c93d8525ebce1024bda7316f602ae45c36cd6f Mon Sep 17 00:00:00 2001 From: Scott Jiang Date: Fri, 12 Aug 2011 18:04:11 -0400 Subject: ASoC: ad193x: fix dac word len setting dac word len value should left shift before setting Signed-off-by: Scott Jiang Acked-by: Barry Song <21cnbao@gmail.com> Signed-off-by: Mark Brown Cc: stable@kernel.org --- sound/soc/codecs/ad193x.c | 3 ++- sound/soc/codecs/ad193x.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/ad193x.c b/sound/soc/codecs/ad193x.c index 2374ca5..f1a8be5 100644 --- a/sound/soc/codecs/ad193x.c +++ b/sound/soc/codecs/ad193x.c @@ -307,7 +307,8 @@ static int ad193x_hw_params(struct snd_pcm_substream *substream, snd_soc_write(codec, AD193X_PLL_CLK_CTRL0, reg); reg = snd_soc_read(codec, AD193X_DAC_CTRL2); - reg = (reg & (~AD193X_DAC_WORD_LEN_MASK)) | word_len; + reg = (reg & (~AD193X_DAC_WORD_LEN_MASK)) + | (word_len << AD193X_DAC_WORD_LEN_SHFT); snd_soc_write(codec, AD193X_DAC_CTRL2, reg); reg = snd_soc_read(codec, AD193X_ADC_CTRL1); diff --git a/sound/soc/codecs/ad193x.h b/sound/soc/codecs/ad193x.h index c1029d2..cccc2e8 100644 --- a/sound/soc/codecs/ad193x.h +++ b/sound/soc/codecs/ad193x.h @@ -34,6 +34,7 @@ #define AD193X_DAC_LEFT_HIGH (1 << 3) #define AD193X_DAC_BCLK_INV (1 << 7) #define AD193X_DAC_CTRL2 0x804 +#define AD193X_DAC_WORD_LEN_SHFT 3 #define AD193X_DAC_WORD_LEN_MASK 0x18 #define AD193X_DAC_MASTER_MUTE 1 #define AD193X_DAC_CHNL_MUTE 0x805 -- cgit v1.1 From 25ea524bed0202f823a0adcbbda68e86a22e3670 Mon Sep 17 00:00:00 2001 From: Scott Jiang Date: Fri, 12 Aug 2011 18:04:12 -0400 Subject: ASoC: ad193x: fix system clock system clock is 24.576MHz instead of 12.288MHz Signed-off-by: Scott Jiang Signed-off-by: Mark Brown --- sound/soc/blackfin/bf5xx-ad193x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/blackfin/bf5xx-ad193x.c b/sound/soc/blackfin/bf5xx-ad193x.c index d6651c0..a118a0f 100644 --- a/sound/soc/blackfin/bf5xx-ad193x.c +++ b/sound/soc/blackfin/bf5xx-ad193x.c @@ -56,7 +56,7 @@ static int bf5xx_ad193x_hw_params(struct snd_pcm_substream *substream, switch (params_rate(params)) { case 48000: - clk = 12288000; + clk = 24576000; break; } -- cgit v1.1 From 396a2e79cdbd562bf7ea48132f8d3ba8304109b2 Mon Sep 17 00:00:00 2001 From: Scott Jiang Date: Fri, 12 Aug 2011 18:04:13 -0400 Subject: ASoC: Add spi hw read function for 16 addr 8 data mode for ad193x fix [This will be used by the ad193x driver to fix the fact that the original author of the driver put a bodge for their particular chip into a the generic ASoC register I/O abstraction layer which looked like an obvious bug which ended up getting fixed in 3.0. Sadly there were no comments documenting what was going on. A minimally invasive correction to the driver is to remove the register cache support and go direct to the hardware all the time so we're adding a new feature -- broonie] Signed-off-by: Scott Jiang Signed-off-by: Mark Brown --- sound/soc/soc-io.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/sound/soc/soc-io.c b/sound/soc/soc-io.c index cca490c..a62f7dd 100644 --- a/sound/soc/soc-io.c +++ b/sound/soc/soc-io.c @@ -205,6 +205,25 @@ static unsigned int snd_soc_16_8_read_i2c(struct snd_soc_codec *codec, #define snd_soc_16_8_read_i2c NULL #endif +#if defined(CONFIG_SPI_MASTER) +static unsigned int snd_soc_16_8_read_spi(struct snd_soc_codec *codec, + unsigned int r) +{ + struct spi_device *spi = codec->control_data; + + const u16 reg = cpu_to_be16(r | 0x100); + u8 data; + int ret; + + ret = spi_write_then_read(spi, ®, 2, &data, 1); + if (ret < 0) + return 0; + return data; +} +#else +#define snd_soc_16_8_read_spi NULL +#endif + static int snd_soc_16_8_write(struct snd_soc_codec *codec, unsigned int reg, unsigned int value) { @@ -295,6 +314,7 @@ static struct { int (*write)(struct snd_soc_codec *codec, unsigned int, unsigned int); unsigned int (*read)(struct snd_soc_codec *, unsigned int); unsigned int (*i2c_read)(struct snd_soc_codec *, unsigned int); + unsigned int (*spi_read)(struct snd_soc_codec *, unsigned int); } io_types[] = { { .addr_bits = 4, .data_bits = 12, @@ -318,6 +338,7 @@ static struct { .addr_bits = 16, .data_bits = 8, .write = snd_soc_16_8_write, .i2c_read = snd_soc_16_8_read_i2c, + .spi_read = snd_soc_16_8_read_spi, }, { .addr_bits = 16, .data_bits = 16, @@ -383,6 +404,8 @@ int snd_soc_codec_set_cache_io(struct snd_soc_codec *codec, #ifdef CONFIG_SPI_MASTER codec->hw_write = do_spi_write; #endif + if (io_types[i].spi_read) + codec->hw_read = io_types[i].spi_read; codec->control_data = container_of(codec->dev, struct spi_device, -- cgit v1.1 From 0cc62e926324d4f3bd02d378baafbe73164fca35 Mon Sep 17 00:00:00 2001 From: Scott Jiang Date: Fri, 12 Aug 2011 18:04:14 -0400 Subject: ASoC: ad193x: remove cache support asoc cache layer can't support this kind of spi registers well. remove cache support and read/write registers directly Signed-off-by: Scott Jiang Signed-off-by: Mark Brown --- sound/soc/codecs/ad193x.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sound/soc/codecs/ad193x.c b/sound/soc/codecs/ad193x.c index f1a8be5..eedb6f5 100644 --- a/sound/soc/codecs/ad193x.c +++ b/sound/soc/codecs/ad193x.c @@ -27,11 +27,6 @@ struct ad193x_priv { int sysclk; }; -/* ad193x register cache & default register settings */ -static const u8 ad193x_reg[AD193X_NUM_REGS] = { - 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, -}; - /* * AD193X volume/mute/de-emphasis etc. controls */ @@ -390,9 +385,6 @@ static int ad193x_probe(struct snd_soc_codec *codec) static struct snd_soc_codec_driver soc_codec_dev_ad193x = { .probe = ad193x_probe, - .reg_cache_default = ad193x_reg, - .reg_cache_size = AD193X_NUM_REGS, - .reg_word_size = sizeof(u16), }; #if defined(CONFIG_SPI_MASTER) -- cgit v1.1 From 8a9af4fdf6d5eeb3200a088354d266a87e8260b0 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Tue, 9 Aug 2011 16:31:54 -0700 Subject: USB: Avoid NULL pointer deref in usb_hcd_alloc_bandwidth. usb_ifnum_to_if() can return NULL if the USB device does not have a configuration installed (usb_device->actconfig == NULL), or if we can't find the interface number in the installed configuration. Return an error instead of crashing. Signed-off-by: Sarah Sharp --- drivers/usb/core/hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 8669ba3..73cbbd8 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1775,6 +1775,8 @@ int usb_hcd_alloc_bandwidth(struct usb_device *udev, struct usb_interface *iface = usb_ifnum_to_if(udev, cur_alt->desc.bInterfaceNumber); + if (!iface) + return -EINVAL; if (iface->resetting_device) { /* * The USB core just reset the device, so the xHCI host -- cgit v1.1 From 75f25bd31d9315ab57e4fb5eba3340452febc48d Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Wed, 3 Aug 2011 13:17:01 +0800 Subject: cpupower: avoid using symlinks Reference the source directly, don't create symlinks. Signed-off-by: WANG Cong Signed-off-by: Dominik Brodowski --- tools/power/cpupower/debug/x86_64/Makefile | 8 ++++---- tools/power/cpupower/debug/x86_64/centrino-decode.c | 1 - tools/power/cpupower/debug/x86_64/powernow-k8-decode.c | 1 - 3 files changed, 4 insertions(+), 6 deletions(-) delete mode 120000 tools/power/cpupower/debug/x86_64/centrino-decode.c delete mode 120000 tools/power/cpupower/debug/x86_64/powernow-k8-decode.c diff --git a/tools/power/cpupower/debug/x86_64/Makefile b/tools/power/cpupower/debug/x86_64/Makefile index dbf1399..3326217 100644 --- a/tools/power/cpupower/debug/x86_64/Makefile +++ b/tools/power/cpupower/debug/x86_64/Makefile @@ -1,10 +1,10 @@ default: all -centrino-decode: centrino-decode.c - $(CC) $(CFLAGS) -o centrino-decode centrino-decode.c +centrino-decode: ../i386/centrino-decode.c + $(CC) $(CFLAGS) -o $@ $< -powernow-k8-decode: powernow-k8-decode.c - $(CC) $(CFLAGS) -o powernow-k8-decode powernow-k8-decode.c +powernow-k8-decode: ../i386/powernow-k8-decode.c + $(CC) $(CFLAGS) -o $@ $< all: centrino-decode powernow-k8-decode diff --git a/tools/power/cpupower/debug/x86_64/centrino-decode.c b/tools/power/cpupower/debug/x86_64/centrino-decode.c deleted file mode 120000 index 26fb3f1..0000000 --- a/tools/power/cpupower/debug/x86_64/centrino-decode.c +++ /dev/null @@ -1 +0,0 @@ -../i386/centrino-decode.c \ No newline at end of file diff --git a/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c b/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c deleted file mode 120000 index eb30c79..0000000 --- a/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c +++ /dev/null @@ -1 +0,0 @@ -../i386/powernow-k8-decode.c \ No newline at end of file -- cgit v1.1 From 2dfc818b35cbea59188cc86e86e0a0efce2b0dbe Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 12 Aug 2011 01:11:35 +0200 Subject: cpupower: mperf monitor - Use TSC to calculate max frequency if possible Which makes the implementation independent from cpufreq drivers. Therefore this would also work on a Xen kernel where the hypervisor is doing frequency switching and idle entering. Signed-off-by: Thomas Renninger Signed-off-by: Dominik Brodowski --- tools/power/cpupower/Makefile | 2 +- .../cpupower/utils/idle_monitor/mperf_monitor.c | 177 +++++++++++++++------ 2 files changed, 131 insertions(+), 48 deletions(-) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 94c2cf0..11521d2 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -24,7 +24,7 @@ # Set the following to `true' to make a unstripped, unoptimized # binary. Leave this set to `false' for production use. -DEBUG ?= false +DEBUG ?= true # make the build silent. Set this to something else to make it noisy again. V ?= false diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index 63ca87a..5650ab5 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -22,12 +22,15 @@ #define MSR_TSC 0x10 +#define MSR_AMD_HWCR 0xc0010015 + enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT }; static int mperf_get_count_percent(unsigned int self_id, double *percent, unsigned int cpu); static int mperf_get_count_freq(unsigned int id, unsigned long long *count, unsigned int cpu); +static struct timespec time_start, time_end; static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = { { @@ -54,19 +57,33 @@ static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = { }, }; +enum MAX_FREQ_MODE { MAX_FREQ_SYSFS, MAX_FREQ_TSC_REF }; +static int max_freq_mode; +/* + * The max frequency mperf is ticking at (in C0), either retrieved via: + * 1) calculated after measurements if we know TSC ticks at mperf/P0 frequency + * 2) cpufreq /sys/devices/.../cpu0/cpufreq/cpuinfo_max_freq at init time + * 1. Is preferred as it also works without cpufreq subsystem (e.g. on Xen) + */ +static unsigned long max_frequency; + static unsigned long long tsc_at_measure_start; static unsigned long long tsc_at_measure_end; -static unsigned long max_frequency; static unsigned long long *mperf_previous_count; static unsigned long long *aperf_previous_count; static unsigned long long *mperf_current_count; static unsigned long long *aperf_current_count; + /* valid flag for all CPUs. If a MSR read failed it will be zero */ static int *is_valid; static int mperf_get_tsc(unsigned long long *tsc) { - return read_msr(0, MSR_TSC, tsc); + int ret; + ret = read_msr(0, MSR_TSC, tsc); + if (ret) + dprint("Reading TSC MSR failed, returning %llu\n", *tsc); + return ret; } static int mperf_init_stats(unsigned int cpu) @@ -97,36 +114,11 @@ static int mperf_measure_stats(unsigned int cpu) return 0; } -/* - * get_average_perf() - * - * Returns the average performance (also considers boosted frequencies) - * - * Input: - * aperf_diff: Difference of the aperf register over a time period - * mperf_diff: Difference of the mperf register over the same time period - * max_freq: Maximum frequency (P0) - * - * Returns: - * Average performance over the time period - */ -static unsigned long get_average_perf(unsigned long long aperf_diff, - unsigned long long mperf_diff) -{ - unsigned int perf_percent = 0; - if (((unsigned long)(-1) / 100) < aperf_diff) { - int shift_count = 7; - aperf_diff >>= shift_count; - mperf_diff >>= shift_count; - } - perf_percent = (aperf_diff * 100) / mperf_diff; - return (max_frequency * perf_percent) / 100; -} - static int mperf_get_count_percent(unsigned int id, double *percent, unsigned int cpu) { unsigned long long aperf_diff, mperf_diff, tsc_diff; + unsigned long long timediff; if (!is_valid[cpu]) return -1; @@ -136,11 +128,19 @@ static int mperf_get_count_percent(unsigned int id, double *percent, mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu]; aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; - tsc_diff = tsc_at_measure_end - tsc_at_measure_start; - *percent = 100.0 * mperf_diff / tsc_diff; - dprint("%s: mperf_diff: %llu, tsc_diff: %llu\n", - mperf_cstates[id].name, mperf_diff, tsc_diff); + if (max_freq_mode == MAX_FREQ_TSC_REF) { + tsc_diff = tsc_at_measure_end - tsc_at_measure_start; + *percent = 100.0 * mperf_diff / tsc_diff; + dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n", + mperf_cstates[id].name, mperf_diff, tsc_diff); + } else if (max_freq_mode == MAX_FREQ_SYSFS) { + timediff = timespec_diff_us(time_start, time_end); + *percent = 100.0 * mperf_diff / timediff; + dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n", + mperf_cstates[id].name, mperf_diff, timediff); + } else + return -1; if (id == Cx) *percent = 100.0 - *percent; @@ -154,7 +154,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent, static int mperf_get_count_freq(unsigned int id, unsigned long long *count, unsigned int cpu) { - unsigned long long aperf_diff, mperf_diff; + unsigned long long aperf_diff, mperf_diff, time_diff, tsc_diff; if (id != AVG_FREQ) return 1; @@ -165,11 +165,21 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count, mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu]; aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; - /* Return MHz for now, might want to return KHz if column width is more - generic */ - *count = get_average_perf(aperf_diff, mperf_diff) / 1000; - dprint("%s: %llu\n", mperf_cstates[id].name, *count); + if (max_freq_mode == MAX_FREQ_TSC_REF) { + /* Calculate max_freq from TSC count */ + tsc_diff = tsc_at_measure_end - tsc_at_measure_start; + time_diff = timespec_diff_us(time_start, time_end); + max_frequency = tsc_diff / time_diff; + } + *count = max_frequency * ((double)aperf_diff / mperf_diff); + dprint("%s: Average freq based on %s maximum frequency:\n", + mperf_cstates[id].name, + (max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read"); + dprint("%max_frequency: %lu", max_frequency); + dprint("aperf_diff: %llu\n", aperf_diff); + dprint("mperf_diff: %llu\n", mperf_diff); + dprint("avg freq: %llu\n", *count); return 0; } @@ -178,6 +188,7 @@ static int mperf_start(void) int cpu; unsigned long long dbg; + clock_gettime(CLOCK_REALTIME, &time_start); mperf_get_tsc(&tsc_at_measure_start); for (cpu = 0; cpu < cpu_count; cpu++) @@ -193,32 +204,104 @@ static int mperf_stop(void) unsigned long long dbg; int cpu; - mperf_get_tsc(&tsc_at_measure_end); - for (cpu = 0; cpu < cpu_count; cpu++) mperf_measure_stats(cpu); + mperf_get_tsc(&tsc_at_measure_end); + clock_gettime(CLOCK_REALTIME, &time_end); + mperf_get_tsc(&dbg); dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end); return 0; } -struct cpuidle_monitor mperf_monitor; - -struct cpuidle_monitor *mperf_register(void) +/* + * Mperf register is defined to tick at P0 (maximum) frequency + * + * Instead of reading out P0 which can be tricky to read out from HW, + * we use TSC counter if it reliably ticks at P0/mperf frequency. + * + * Still try to fall back to: + * /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq + * on older Intel HW without invariant TSC feature. + * Or on AMD machines where TSC does not tick at P0 (do not exist yet, but + * it's still double checked (MSR_AMD_HWCR)). + * + * On these machines the user would still get useful mperf + * stats when acpi-cpufreq driver is loaded. + */ +static int init_maxfreq_mode(void) { + int ret; + unsigned long long hwcr; unsigned long min; - if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF)) - return NULL; - - /* Assume min/max all the same on all cores */ + if (!cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC) + goto use_sysfs; + + if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) { + /* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf + * freq. + * A test whether hwcr is accessable/available would be: + * (cpupower_cpu_info.family > 0x10 || + * cpupower_cpu_info.family == 0x10 && + * cpupower_cpu_info.model >= 0x2)) + * This should be the case for all aperf/mperf + * capable AMD machines and is therefore safe to test here. + * Compare with Linus kernel git commit: acf01734b1747b1ec4 + */ + ret = read_msr(0, MSR_AMD_HWCR, &hwcr); + /* + * If the MSR read failed, assume a Xen system that did + * not explicitly provide access to it and assume TSC works + */ + if (ret != 0) { + dprint("TSC read 0x%x failed - assume TSC working\n", + MSR_AMD_HWCR); + return 0; + } else if (1 & (hwcr >> 24)) { + max_freq_mode = MAX_FREQ_TSC_REF; + return 0; + } else { /* Use sysfs max frequency if available */ } + } else if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) { + /* + * On Intel we assume mperf (in C0) is ticking at same + * rate than TSC + */ + max_freq_mode = MAX_FREQ_TSC_REF; + return 0; + } +use_sysfs: if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) { dprint("Cannot retrieve max freq from cpufreq kernel " "subsystem\n"); - return NULL; + return -1; } + max_freq_mode = MAX_FREQ_SYSFS; + return 0; +} + +/* + * This monitor provides: + * + * 1) Average frequency a CPU resided in + * This always works if the CPU has aperf/mperf capabilities + * + * 2) C0 and Cx (any sleep state) time a CPU resided in + * Works if mperf timer stops ticking in sleep states which + * seem to be the case on all current HW. + * Both is directly retrieved from HW registers and is independent + * from kernel statistics. + */ +struct cpuidle_monitor mperf_monitor; +struct cpuidle_monitor *mperf_register(void) +{ + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF)) + return NULL; + + if (init_maxfreq_mode()) + return NULL; /* Free this at program termination */ is_valid = calloc(cpu_count, sizeof(int)); -- cgit v1.1 From 88f984e0e235f82a5d34f4a99244eeb14e1413e0 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 12 Aug 2011 01:11:36 +0200 Subject: cpupower: Do not show an empty Idle_Stats monitor if no idle driver is available By taking error values of: sysfs_get_idlestate_count(..); into account. Signed-off-by: Thomas Renninger Signed-off-by: Dominik Brodowski --- tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index d048b96..bcd22a1 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -134,7 +134,7 @@ static struct cpuidle_monitor *cpuidle_register(void) /* Assume idle state count is the same for all CPUs */ cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0); - if (cpuidle_sysfs_monitor.hw_states_num == 0) + if (cpuidle_sysfs_monitor.hw_states_num <= 0) return NULL; for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) { -- cgit v1.1 From 7c74d2bc5a9d43d33d6f16c1e706147162e2bc52 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 12 Aug 2011 01:11:37 +0200 Subject: cpupower: Better detect offlined CPUs Before, checking for offlined CPUs was done dirty and it was checked whether topology parsing returned -1 values. But this is a valid case on a Xen (and possibly other) kernels. Do proper online/offline checking, also take CONFIG_HOTPLUG_CPU option into account (no /sys/devices/../cpuX/online file). Signed-off-by: Thomas Renninger Signed-off-by: Dominik Brodowski --- tools/power/cpupower/utils/helpers/helpers.h | 3 ++ tools/power/cpupower/utils/helpers/sysfs.c | 50 ++++++++++++++++++++++ tools/power/cpupower/utils/helpers/sysfs.h | 2 + tools/power/cpupower/utils/helpers/topology.c | 5 ++- .../cpupower/utils/idle_monitor/cpupower-monitor.c | 10 +++-- 5 files changed, 66 insertions(+), 4 deletions(-) diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 592ee36..7a83022 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -96,6 +96,9 @@ struct cpupower_topology { int pkg; int core; int cpu; + + /* flags */ + unsigned int is_online:1; } *core_info; }; diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c index 55e2466..c634302 100644 --- a/tools/power/cpupower/utils/helpers/sysfs.c +++ b/tools/power/cpupower/utils/helpers/sysfs.c @@ -56,6 +56,56 @@ static unsigned int sysfs_write_file(const char *path, return (unsigned int) numwrite; } +/* + * Detect whether a CPU is online + * + * Returns: + * 1 -> if CPU is online + * 0 -> if CPU is offline + * negative errno values in error case + */ +int sysfs_is_cpu_online(unsigned int cpu) +{ + char path[SYSFS_PATH_MAX]; + int fd; + ssize_t numread; + unsigned long long value; + char linebuf[MAX_LINE_LEN]; + char *endp; + struct stat statbuf; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu); + + if (stat(path, &statbuf) != 0) + return 0; + + /* + * kernel without CONFIG_HOTPLUG_CPU + * -> cpuX directory exists, but not cpuX/online file + */ + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu); + if (stat(path, &statbuf) != 0) + return 1; + + fd = open(path, O_RDONLY); + if (fd == -1) + return -errno; + + numread = read(fd, linebuf, MAX_LINE_LEN - 1); + if (numread < 1) { + close(fd); + return -EIO; + } + linebuf[numread] = '\0'; + close(fd); + + value = strtoull(linebuf, &endp, 0); + if (value > 1 || value < 0) + return -EINVAL; + + return value; +} + /* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */ /* diff --git a/tools/power/cpupower/utils/helpers/sysfs.h b/tools/power/cpupower/utils/helpers/sysfs.h index f9373e0..8cb797b 100644 --- a/tools/power/cpupower/utils/helpers/sysfs.h +++ b/tools/power/cpupower/utils/helpers/sysfs.h @@ -7,6 +7,8 @@ extern unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen); +extern int sysfs_is_cpu_online(unsigned int cpu); + extern unsigned long sysfs_get_idlestate_latency(unsigned int cpu, unsigned int idlestate); extern unsigned long sysfs_get_idlestate_usage(unsigned int cpu, diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c index 385ee5c..4eae2c4 100644 --- a/tools/power/cpupower/utils/helpers/topology.c +++ b/tools/power/cpupower/utils/helpers/topology.c @@ -41,6 +41,8 @@ struct cpuid_core_info { unsigned int pkg; unsigned int thread; unsigned int cpu; + /* flags */ + unsigned int is_online:1; }; static int __compare(const void *t1, const void *t2) @@ -78,6 +80,8 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) return -ENOMEM; cpu_top->pkgs = cpu_top->cores = 0; for (cpu = 0; cpu < cpus; cpu++) { + cpu_top->core_info[cpu].cpu = cpu; + cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu); cpu_top->core_info[cpu].pkg = sysfs_topology_read_file(cpu, "physical_package_id"); if ((int)cpu_top->core_info[cpu].pkg != -1 && @@ -85,7 +89,6 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) cpu_top->pkgs = cpu_top->core_info[cpu].pkg; cpu_top->core_info[cpu].core = sysfs_topology_read_file(cpu, "core_id"); - cpu_top->core_info[cpu].cpu = cpu; } cpu_top->pkgs++; diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index ba4bf06..dd8e1ea 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -190,9 +190,13 @@ void print_results(int topology_depth, int cpu) } } } - /* cpu offline */ - if (cpu_top.core_info[cpu].pkg == -1 || - cpu_top.core_info[cpu].core == -1) { + /* + * The monitor could still provide useful data, for example + * AMD HW counters partly sit in PCI config space. + * It's up to the monitor plug-in to check .is_online, this one + * is just for additional info. + */ + if (!cpu_top.core_info[cpu].is_online) { printf(_(" *is offline\n")); return; } else -- cgit v1.1 From 9ee31f618a3c8209b2bd4bedd71fd5f2be7786bd Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 12 Aug 2011 01:11:38 +0200 Subject: cpupower: Make monitor command -c/--cpu aware This allows for example: cpupower -c 2-4,6 monitor -m Mperf |Mperf PKG |CORE|CPU | C0 | Cx | Freq 0| 8| 4| 2.42| 97.58| 1353 0| 16| 2| 14.38| 85.62| 1928 0| 24| 6| 1.76| 98.24| 1442 1| 16| 3| 15.53| 84.47| 1650 CPUs always get resorted for package, core then cpu id if it could get read out (or however you name these topology levels...). Still this is a nice way to keep the overview if a test binary is bound to a specific CPU or if one wants to show all CPUs inside a package or similar. Still missing: Do not measure not available cores to reduce the overhead and achieve better results. Signed-off-by: Thomas Renninger Signed-off-by: Dominik Brodowski --- tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index dd8e1ea..6cb8d9e 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -149,6 +149,10 @@ void print_results(int topology_depth, int cpu) unsigned long long result; cstate_t s; + /* Be careful CPUs may got resorted for pkg value do not just use cpu */ + if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu)) + return; + if (topology_depth > 2) printf("%4d|", cpu_top.core_info[cpu].pkg); if (topology_depth > 1) @@ -389,6 +393,10 @@ int cmd_monitor(int argc, char **argv) return EXIT_FAILURE; } + /* Default is: monitor all CPUs */ + if (bitmask_isallclear(cpus_chosen)) + bitmask_setall(cpus_chosen); + dprint("System has up to %d CPU cores\n", cpu_count); for (num = 0; all_monitors[num]; num++) { -- cgit v1.1 From 4853abaae7e4a2af938115ce9071ef8684fb7af4 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Mon, 15 Aug 2011 21:37:25 +0200 Subject: block: fix flush machinery for stacking drivers with differring flush flags Commit ae1b1539622fb46e51b4d13b3f9e5f4c713f86ae, block: reimplement FLUSH/FUA to support merge, introduced a performance regression when running any sort of fsyncing workload using dm-multipath and certain storage (in our case, an HP EVA). The test I ran was fs_mark, and it dropped from ~800 files/sec on ext4 to ~100 files/sec. It turns out that dm-multipath always advertised flush+fua support, and passed commands on down the stack, where those flags used to get stripped off. The above commit changed that behavior: static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; while (1) { - while (!list_empty(&q->queue_head)) { + if (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || - (rq->cmd_flags & REQ_FLUSH_SEQ)) - return rq; - rq = blk_do_flush(q, rq); - if (rq) - return rq; + return rq; } Note that previously, a command would come in here, have REQ_FLUSH|REQ_FUA set, and then get handed off to blk_do_flush: struct request *blk_do_flush(struct request_queue *q, struct request *rq) { unsigned int fflags = q->flush_flags; /* may change, cache it */ bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); unsigned skip = 0; ... if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { rq->cmd_flags &= ~REQ_FLUSH; if (!has_fua) rq->cmd_flags &= ~REQ_FUA; return rq; } So, the flush machinery was bypassed in such cases (q->flush_flags == 0 && rq->cmd_flags & (REQ_FLUSH|REQ_FUA)). Now, however, we don't get into the flush machinery at all. Instead, __elv_next_request just hands a request with flush and fua bits set to the scsi_request_fn, even if the underlying request_queue does not support flush or fua. The agreed upon approach is to fix the flush machinery to allow stacking. While this isn't used in practice (since there is only one request-based dm target, and that target will now reflect the flush flags of the underlying device), it does future-proof the solution, and make it function as designed. In order to make this work, I had to add a field to the struct request, inside the flush structure (to store the original req->end_io). Shaohua had suggested overloading the union with rb_node and completion_data, but the completion data is used by device mapper and can also be used by other drivers. So, I didn't see a way around the additional field. I tested this patch on an HP EVA with both ext4 and xfs, and it recovers the lost performance. Comments and other testers, as always, are appreciated. Cheers, Jeff Signed-off-by: Jeff Moyer Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 8 ++++++-- block/blk-flush.c | 20 ++++++++++++++++---- block/blk.h | 2 ++ include/linux/blkdev.h | 1 + 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index b850bed..7c59b0f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1700,6 +1700,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits); int blk_insert_cloned_request(struct request_queue *q, struct request *rq) { unsigned long flags; + int where = ELEVATOR_INSERT_BACK; if (blk_rq_check_limits(q, rq)) return -EIO; @@ -1716,7 +1717,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) */ BUG_ON(blk_queued_rq(rq)); - add_acct_request(q, rq, ELEVATOR_INSERT_BACK); + if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA)) + where = ELEVATOR_INSERT_FLUSH; + + add_acct_request(q, rq, where); spin_unlock_irqrestore(q->queue_lock, flags); return 0; @@ -2273,7 +2277,7 @@ static bool blk_end_bidi_request(struct request *rq, int error, * %false - we are done with this request * %true - still buffers pending for this request **/ -static bool __blk_end_bidi_request(struct request *rq, int error, +bool __blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes) { if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) diff --git a/block/blk-flush.c b/block/blk-flush.c index 2d162bd..491eb30 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -123,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq) /* make @rq a normal request */ rq->cmd_flags &= ~REQ_FLUSH_SEQ; - rq->end_io = NULL; + rq->end_io = rq->flush.saved_end_io; } /** @@ -301,9 +301,6 @@ void blk_insert_flush(struct request *rq) unsigned int fflags = q->flush_flags; /* may change, cache */ unsigned int policy = blk_flush_policy(fflags, rq); - BUG_ON(rq->end_io); - BUG_ON(!rq->bio || rq->bio != rq->biotail); - /* * @policy now records what operations need to be done. Adjust * REQ_FLUSH and FUA for the driver. @@ -313,6 +310,19 @@ void blk_insert_flush(struct request *rq) rq->cmd_flags &= ~REQ_FUA; /* + * An empty flush handed down from a stacking driver may + * translate into nothing if the underlying device does not + * advertise a write-back cache. In this case, simply + * complete the request. + */ + if (!policy) { + __blk_end_bidi_request(rq, 0, 0, 0); + return; + } + + BUG_ON(!rq->bio || rq->bio != rq->biotail); + + /* * If there's data but flush is not necessary, the request can be * processed directly without going through flush machinery. Queue * for normal execution. @@ -320,6 +330,7 @@ void blk_insert_flush(struct request *rq) if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { list_add_tail(&rq->queuelist, &q->queue_head); + blk_run_queue_async(q); return; } @@ -330,6 +341,7 @@ void blk_insert_flush(struct request *rq) memset(&rq->flush, 0, sizeof(rq->flush)); INIT_LIST_HEAD(&rq->flush.list); rq->cmd_flags |= REQ_FLUSH_SEQ; + rq->flush.saved_end_io = rq->end_io; /* Usually NULL */ rq->end_io = flush_data_end_io; blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); diff --git a/block/blk.h b/block/blk.h index d658628..20b900a 100644 --- a/block/blk.h +++ b/block/blk.h @@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, struct bio *bio); void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); +bool __blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes); void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8479285..84b15d5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,6 +118,7 @@ struct request { struct { unsigned int seq; struct list_head list; + rq_end_io_fn *saved_end_io; } flush; }; -- cgit v1.1 From 795858dbd253462a67e14272edeaae73c6074b17 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 15 Aug 2011 13:02:37 -0700 Subject: ceph: fix encoding of ino only (not relative) paths A 'path' consists of a starting ino and relative component. Encode even when there is no relative component. This is primarily needed by the NFS reexport code. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index fee028b..86c59e1 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1595,7 +1595,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath); dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); - } else if (rpath) { + } else if (rpath || rino) { *ino = rino; *ppath = rpath; *pathlen = strlen(rpath); -- cgit v1.1 From 18adad1c57f820d38d05e3d5e3d548e286233b76 Mon Sep 17 00:00:00 2001 From: Gerard Braad Date: Tue, 16 Aug 2011 00:17:56 -0700 Subject: Input: wacom - add support for the Wacom Bamboo Pen (CTL-660/K) Signed-off-by: Gerard Braad Reviewed-by: Chris Bagwell Signed-off-by: Ping Cheng Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/wacom_wac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 03ebcc8..c1c2f7b 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -1460,6 +1460,9 @@ static const struct wacom_features wacom_features_0xD3 = static const struct wacom_features wacom_features_0xD4 = { "Wacom Bamboo Pen", WACOM_PKGLEN_BBFUN, 14720, 9200, 1023, 63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0xD5 = + { "Wacom Bamboo Pen 6x8", WACOM_PKGLEN_BBFUN, 21648, 13530, 1023, + 63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0xD6 = { "Wacom BambooPT 2FG 4x5", WACOM_PKGLEN_BBFUN, 14720, 9200, 1023, 63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -1564,6 +1567,7 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0xD2) }, { USB_DEVICE_WACOM(0xD3) }, { USB_DEVICE_WACOM(0xD4) }, + { USB_DEVICE_WACOM(0xD5) }, { USB_DEVICE_WACOM(0xD6) }, { USB_DEVICE_WACOM(0xD7) }, { USB_DEVICE_WACOM(0xD8) }, -- cgit v1.1 From a417ea4432db7fd1c91c19b129a3e3d2367b7ce4 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Tue, 16 Aug 2011 00:17:56 -0700 Subject: Input: wacom - add WAC_MSG_RETRIES define Use WAC_MSG_RETRIES define instead of a numeric constant. Signed-off-by: Ping Cheng Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/wacom_sys.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c index 449c0a4..9879c73 100644 --- a/drivers/input/tablet/wacom_sys.c +++ b/drivers/input/tablet/wacom_sys.c @@ -49,6 +49,7 @@ struct hid_descriptor { #define USB_REQ_GET_REPORT 0x01 #define USB_REQ_SET_REPORT 0x09 #define WAC_HID_FEATURE_REPORT 0x03 +#define WAC_MSG_RETRIES 5 static int usb_get_report(struct usb_interface *intf, unsigned char type, unsigned char id, void *buf, int size) @@ -165,7 +166,7 @@ static int wacom_parse_hid(struct usb_interface *intf, struct hid_descriptor *hi report, hid_desc->wDescriptorLength, 5000); /* 5 secs */ - } while (result < 0 && limit++ < 5); + } while (result < 0 && limit++ < WAC_MSG_RETRIES); /* No need to parse the Descriptor. It isn't an error though */ if (result < 0) @@ -336,7 +337,7 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat error = usb_get_report(intf, WAC_HID_FEATURE_REPORT, report_id, rep_data, 3); - } while ((error < 0 || rep_data[1] != 4) && limit++ < 5); + } while ((error < 0 || rep_data[1] != 4) && limit++ < WAC_MSG_RETRIES); } else if (features->type != TABLETPC) { do { rep_data[0] = 2; @@ -347,7 +348,7 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat error = usb_get_report(intf, WAC_HID_FEATURE_REPORT, report_id, rep_data, 2); - } while ((error < 0 || rep_data[1] != 2) && limit++ < 5); + } while ((error < 0 || rep_data[1] != 2) && limit++ < WAC_MSG_RETRIES); } kfree(rep_data); -- cgit v1.1 From 3b48c91cdf2d6827ce315b3b112310fa02198db0 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Tue, 16 Aug 2011 00:17:57 -0700 Subject: Input: wacom - report id 3 returns 4 bytes of data Signed-off-by: Ping Cheng Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/wacom_sys.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c index 9879c73..d27c9d9 100644 --- a/drivers/input/tablet/wacom_sys.c +++ b/drivers/input/tablet/wacom_sys.c @@ -320,23 +320,25 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat int limit = 0, report_id = 2; int error = -ENOMEM; - rep_data = kmalloc(2, GFP_KERNEL); + rep_data = kmalloc(4, GFP_KERNEL); if (!rep_data) return error; - /* ask to report tablet data if it is 2FGT Tablet PC or + /* ask to report tablet data if it is MT Tablet PC or * not a Tablet PC */ if (features->type == TABLETPC2FG) { do { rep_data[0] = 3; rep_data[1] = 4; + rep_data[2] = 0; + rep_data[3] = 0; report_id = 3; error = usb_set_report(intf, WAC_HID_FEATURE_REPORT, - report_id, rep_data, 2); + report_id, rep_data, 4); if (error >= 0) error = usb_get_report(intf, WAC_HID_FEATURE_REPORT, report_id, - rep_data, 3); + rep_data, 4); } while ((error < 0 || rep_data[1] != 4) && limit++ < WAC_MSG_RETRIES); } else if (features->type != TABLETPC) { do { -- cgit v1.1 From c503ad466da44ca23c658986629bf7a2e2eabbb7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Aug 2011 14:23:20 +0200 Subject: ALSA: hda - Fix duplicated capture-volume creation for ALC268 models Fix the duplicated creation of capture-mixer elements for some static ALC268 configurations. The capture mixers must be put to cap_mixer field instead of mixers array. Signed-off-by: Takashi Iwai --- sound/pci/hda/alc268_quirks.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/sound/pci/hda/alc268_quirks.c b/sound/pci/hda/alc268_quirks.c index be58bf2..2e5876c 100644 --- a/sound/pci/hda/alc268_quirks.c +++ b/sound/pci/hda/alc268_quirks.c @@ -476,8 +476,8 @@ static const struct snd_pci_quirk alc268_ssid_cfg_tbl[] = { static const struct alc_config_preset alc268_presets[] = { [ALC267_QUANTA_IL1] = { - .mixers = { alc267_quanta_il1_mixer, alc268_beep_mixer, - alc268_capture_nosrc_mixer }, + .mixers = { alc267_quanta_il1_mixer, alc268_beep_mixer }, + .cap_mixer = alc268_capture_nosrc_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc267_quanta_il1_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -492,8 +492,8 @@ static const struct alc_config_preset alc268_presets[] = { .init_hook = alc_inithook, }, [ALC268_3ST] = { - .mixers = { alc268_base_mixer, alc268_capture_alt_mixer, - alc268_beep_mixer }, + .mixers = { alc268_base_mixer, alc268_beep_mixer }, + .cap_mixer = alc268_capture_alt_mixer, .init_verbs = { alc268_base_init_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), .dac_nids = alc268_dac_nids, @@ -507,8 +507,8 @@ static const struct alc_config_preset alc268_presets[] = { .input_mux = &alc268_capture_source, }, [ALC268_TOSHIBA] = { - .mixers = { alc268_toshiba_mixer, alc268_capture_alt_mixer, - alc268_beep_mixer }, + .mixers = { alc268_toshiba_mixer, alc268_beep_mixer }, + .cap_mixer = alc268_capture_alt_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_toshiba_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -525,8 +525,8 @@ static const struct alc_config_preset alc268_presets[] = { .init_hook = alc_inithook, }, [ALC268_ACER] = { - .mixers = { alc268_acer_mixer, alc268_capture_alt_mixer, - alc268_beep_mixer }, + .mixers = { alc268_acer_mixer, alc268_beep_mixer }, + .cap_mixer = alc268_capture_alt_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_acer_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -543,8 +543,8 @@ static const struct alc_config_preset alc268_presets[] = { .init_hook = alc_inithook, }, [ALC268_ACER_DMIC] = { - .mixers = { alc268_acer_dmic_mixer, alc268_capture_alt_mixer, - alc268_beep_mixer }, + .mixers = { alc268_acer_dmic_mixer, alc268_beep_mixer }, + .cap_mixer = alc268_capture_alt_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_acer_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -561,9 +561,8 @@ static const struct alc_config_preset alc268_presets[] = { .init_hook = alc_inithook, }, [ALC268_ACER_ASPIRE_ONE] = { - .mixers = { alc268_acer_aspire_one_mixer, - alc268_beep_mixer, - alc268_capture_nosrc_mixer }, + .mixers = { alc268_acer_aspire_one_mixer, alc268_beep_mixer}, + .cap_mixer = alc268_capture_nosrc_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_acer_aspire_one_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -579,8 +578,8 @@ static const struct alc_config_preset alc268_presets[] = { .init_hook = alc_inithook, }, [ALC268_DELL] = { - .mixers = { alc268_dell_mixer, alc268_beep_mixer, - alc268_capture_nosrc_mixer }, + .mixers = { alc268_dell_mixer, alc268_beep_mixer}, + .cap_mixer = alc268_capture_nosrc_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_dell_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -596,8 +595,8 @@ static const struct alc_config_preset alc268_presets[] = { .init_hook = alc_inithook, }, [ALC268_ZEPTO] = { - .mixers = { alc268_base_mixer, alc268_capture_alt_mixer, - alc268_beep_mixer }, + .mixers = { alc268_base_mixer, alc268_beep_mixer }, + .cap_mixer = alc268_capture_alt_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_toshiba_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -616,7 +615,8 @@ static const struct alc_config_preset alc268_presets[] = { }, #ifdef CONFIG_SND_DEBUG [ALC268_TEST] = { - .mixers = { alc268_test_mixer, alc268_capture_mixer }, + .mixers = { alc268_test_mixer }, + .cap_mixer = alc268_capture_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_volume_init_verbs, alc268_beep_init_verbs }, -- cgit v1.1 From fa71f447065f676157ba6a2c121ba419818fc559 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Aug 2011 11:50:24 -0400 Subject: cifs: demote cERROR in build_path_from_dentry to cFYI Running the cthon tests on a recent kernel caused this message to pop occasionally: CIFS VFS: did not end path lookup where expected namelen is 0 Some added debugging showed that namelen and dfsplen were both 0 when this occurred. That means that the read_seqretry returned true. Assuming that the comment inside the if statement is true, this should be harmless and just means that we raced with a rename. If that is the case, then there's no need for alarm and we can demote this to cFYI. While we're at it, print the dfsplen too so that we can see what happened here if the message pops during debugging. Cc: stable@kernel.org Cc: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ae576fb..72d448b 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -105,8 +105,8 @@ cifs_bp_rename_retry: } rcu_read_unlock(); if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { - cERROR(1, "did not end path lookup where expected namelen is %d", - namelen); + cFYI(1, "did not end path lookup where expected. namelen=%d " + "dfsplen=%d", namelen, dfsplen); /* presumably this is only possible if racing with a rename of one of the parent directories (we can not lock the dentries above us to prevent this, but retrying should be harmless) */ -- cgit v1.1 From 22cfb0bf6721bb1f865f67bc21e3c36c272faf36 Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Tue, 16 Aug 2011 10:56:54 +0000 Subject: IPoIB: Fix possible NULL dereference in ipoib_start_xmit() Fix a bug introduced in 69cce1d14049 ("net: Abstract dst->neighbour accesses behind helpers.") where we might dereference skb_dst(skb) even if it is NULL, which causes: [ 240.944030] BUG: unable to handle kernel NULL pointer dereference at 0000000000000040 [ 240.948007] IP: [] ipoib_start_xmit+0x39/0x280 [ib_ipoib] [...] [ 240.948007] Call Trace: [ 240.948007] [ 240.948007] [] dev_hard_start_xmit+0x2a0/0x590 [ 240.948007] [] ? arp_create+0x70/0x200 [ 240.948007] [] sch_direct_xmit+0xef/0x1c0 Addresses: https://bugzilla.kernel.org/show_bug.cgi?id=41212 Signed-off-by: Bernd Schubert Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 43f89ba..fe89c46 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -717,11 +717,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh; - struct neighbour *n; + struct neighbour *n = NULL; unsigned long flags; - n = dst_get_neighbour(skb_dst(skb)); - if (likely(skb_dst(skb) && n)) { + if (likely(skb_dst(skb))) + n = dst_get_neighbour(skb_dst(skb)); + + if (likely(n)) { if (unlikely(!*to_ipoib_neigh(n))) { ipoib_path_lookup(skb, dev); return NETDEV_TX_OK; -- cgit v1.1 From c2bceb3d7f145af5a0916bea700f2f9d380901ea Mon Sep 17 00:00:00 2001 From: Lionel Elie Mamane Date: Sat, 13 Aug 2011 14:04:38 +0000 Subject: sit tunnels: propagate IPv6 transport class to IPv4 Type of Service sit tunnels (IPv6 tunnel over IPv4) do not implement the "tos inherit" case to copy the IPv6 transport class byte from the inner packet to the IPv4 type of service byte in the outer packet. By contrast, ipip tunnels and GRE tunnels do. This patch, adapted from the similar code in net/ipv4/ipip.c and net/ipv4/ip_gre.c, implements that. This patch applies to 3.0.1, and has been tested on that version. Signed-off-by: Lionel Elie Mamane Signed-off-by: David S. Miller --- net/ipv6/sit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 07bf108..00b15ac 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -672,6 +672,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; + if (tos == 1) + tos = ipv6_get_dsfield(iph6); + /* ISATAP (RFC4214) - must come before 6to4 */ if (dev->priv_flags & IFF_ISATAP) { struct neighbour *neigh = NULL; -- cgit v1.1 From 48df4a6fd8c40c0bbcbca2044f5f2bc75dcf6db1 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Fri, 12 Aug 2011 10:23:01 -0700 Subject: xhci: Handle zero-length isochronous packets. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a long time, the xHCI driver has had this note: /* FIXME: Ignoring zero-length packets, can those happen? */ It turns out that, yes, there are drivers that need to queue zero-length transfers for isochronous OUT transfers. Without this patch, users will see kernel hang messages when a driver attempts to enqueue an isochronous URB with a zero length transfer (because count_isoc_trbs_needed will return zero for that TD, xhci_td->last_trb will never be set, and updating the dequeue pointer will cause an infinite loop). Matěj ran into this issue when using an NI Audio4DJ USB soundcard with the snd-usb-caiaq driver. See https://bugzilla.kernel.org/show_bug.cgi?id=40702 Fix count_isoc_trbs_needed() to return 1 for zero-length transfers (thanks Alan on the math help). Update the various TRB field calculations to deal with zero-length transfers. We're still transferring one packet with a zero-length data payload, so the total_packet_count should be 1. The Transfer Burst Count (TBC) and Transfer Last Burst Packet Count (TLBPC) fields should be set to zero. This patch should be backported to kernels as old as 2.6.36. Signed-off-by: Sarah Sharp Tested-by: Matěj Laitl Cc: Daniel Mack Cc: Alan Stern Cc: stable@kernel.org --- drivers/usb/host/xhci-ring.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b2d654b..54139a2 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2684,6 +2684,10 @@ static u32 xhci_v1_0_td_remainder(int running_total, int trb_buff_len, { int packets_transferred; + /* One TRB with a zero-length data packet. */ + if (running_total == 0 && trb_buff_len == 0) + return 0; + /* All the TRB queueing functions don't count the current TRB in * running_total. */ @@ -3125,20 +3129,15 @@ static int count_isoc_trbs_needed(struct xhci_hcd *xhci, struct urb *urb, int i) { int num_trbs = 0; - u64 addr, td_len, running_total; + u64 addr, td_len; addr = (u64) (urb->transfer_dma + urb->iso_frame_desc[i].offset); td_len = urb->iso_frame_desc[i].length; - running_total = TRB_MAX_BUFF_SIZE - (addr & (TRB_MAX_BUFF_SIZE - 1)); - running_total &= TRB_MAX_BUFF_SIZE - 1; - if (running_total != 0) - num_trbs++; - - while (running_total < td_len) { + num_trbs = DIV_ROUND_UP(td_len + (addr & (TRB_MAX_BUFF_SIZE - 1)), + TRB_MAX_BUFF_SIZE); + if (num_trbs == 0) num_trbs++; - running_total += TRB_MAX_BUFF_SIZE; - } return num_trbs; } @@ -3250,9 +3249,11 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, addr = start_addr + urb->iso_frame_desc[i].offset; td_len = urb->iso_frame_desc[i].length; td_remain_len = td_len; - /* FIXME: Ignoring zero-length packets, can those happen? */ total_packet_count = roundup(td_len, le16_to_cpu(urb->ep->desc.wMaxPacketSize)); + /* A zero-length transfer still involves at least one packet. */ + if (total_packet_count == 0) + total_packet_count++; burst_count = xhci_get_burst_count(xhci, urb->dev, urb, total_packet_count); residue = xhci_get_last_burst_packet_count(xhci, -- cgit v1.1 From eb39d34004888afcc0a44d9c36383cd69fa3b3b9 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 26 Jul 2011 16:59:00 -0700 Subject: target: Change TCM_NON_EXISTENT_LUN response to ASC=LOGICAL UNIT NOT SUPPORTED This patch changes transport_send_check_condition_and_sense() for TCM_NON_EXISTENT_LUN emulation to use 0x25 (LOGICAL UNIT NOT SUPPORTED) instead of the original 0x20 (INVALID COMMAND OPERATION CODE). This is helpful to distinguish between TCM_UNSUPPORTED_SCSI_OPCODE ASC=0x20 exceptions. Signed-off-by: Nicholas A. Bellinger --- drivers/target/target_core_transport.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 8976032..cc5a339 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -4726,6 +4726,13 @@ int transport_send_check_condition_and_sense( */ switch (reason) { case TCM_NON_EXISTENT_LUN: + /* CURRENT ERROR */ + buffer[offset] = 0x70; + /* ILLEGAL REQUEST */ + buffer[offset+SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; + /* LOGICAL UNIT NOT SUPPORTED */ + buffer[offset+SPC_ASC_KEY_OFFSET] = 0x25; + break; case TCM_UNSUPPORTED_SCSI_OPCODE: case TCM_SECTOR_COUNT_TOO_MANY: /* CURRENT ERROR */ -- cgit v1.1 From d5e2003c2bcda93a8f2e668eb4642d70c9c38301 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 4 Aug 2011 14:52:27 +0000 Subject: Btrfs: detect wether a device supports discard We have a problem where if a user specifies discard but doesn't actually support it we will return EOPNOTSUPP from btrfs_discard_extent. This is a problem because this gets called (in a fashion) from the tree log recovery code, which has a nice little BUG_ON(ret) after it, which causes us to fail the tree log replay. So instead detect wether our devices support discard when we're adding them and then don't issue discards if we know that the device doesn't support it. And just for good measure set ret = 0 in btrfs_issue_discard just in case we still get EOPNOTSUPP so we don't screw anybody up like this again. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 12 ++++++++++-- fs/btrfs/volumes.c | 17 +++++++++++++++++ fs/btrfs/volumes.h | 2 ++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 66bac22..059dfa0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1782,6 +1782,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, for (i = 0; i < multi->num_stripes; i++, stripe++) { + if (!stripe->dev->can_discard) + continue; + ret = btrfs_issue_discard(stripe->dev->bdev, stripe->physical, stripe->length); @@ -1789,11 +1792,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, discarded_bytes += stripe->length; else if (ret != -EOPNOTSUPP) break; + + /* + * Just in case we get back EOPNOTSUPP for some reason, + * just ignore the return value so we don't screw up + * people calling discard_extent. + */ + ret = 0; } kfree(multi); } - if (discarded_bytes && ret == -EOPNOTSUPP) - ret = 0; if (actual_bytes) *actual_bytes = discarded_bytes; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3c5f2fc..a595f87 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -517,6 +517,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) fs_devices->rw_devices--; } + if (device->can_discard) + fs_devices->num_can_discard--; + new_device = kmalloc(sizeof(*new_device), GFP_NOFS); BUG_ON(!new_device); memcpy(new_device, device, sizeof(*new_device)); @@ -525,6 +528,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; + new_device->can_discard = 0; list_replace_rcu(&device->dev_list, &new_device->dev_list); call_rcu(&device->rcu, free_device); @@ -564,6 +568,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fmode_t flags, void *holder) { + struct request_queue *q; struct block_device *bdev; struct list_head *head = &fs_devices->devices; struct btrfs_device *device; @@ -620,6 +625,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, seeding = 0; } + q = bdev_get_queue(bdev); + if (blk_queue_discard(q)) { + device->can_discard = 1; + fs_devices->num_can_discard++; + } + device->bdev = bdev; device->in_fs_metadata = 0; device->mode = flags; @@ -1560,6 +1571,7 @@ error: int btrfs_init_new_device(struct btrfs_root *root, char *device_path) { + struct request_queue *q; struct btrfs_trans_handle *trans; struct btrfs_device *device; struct block_device *bdev; @@ -1629,6 +1641,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) lock_chunks(root); + q = bdev_get_queue(bdev); + if (blk_queue_discard(q)) + device->can_discard = 1; device->writeable = 1; device->work.func = pending_bios_fn; generate_random_uuid(device->uuid); @@ -1664,6 +1679,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->num_devices++; root->fs_info->fs_devices->open_devices++; root->fs_info->fs_devices->rw_devices++; + if (device->can_discard) + root->fs_info->fs_devices->num_can_discard++; root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; if (!blk_queue_nonrot(bdev_get_queue(bdev))) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7c12d61..6d866db 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -48,6 +48,7 @@ struct btrfs_device { int writeable; int in_fs_metadata; int missing; + int can_discard; spinlock_t io_lock; @@ -104,6 +105,7 @@ struct btrfs_fs_devices { u64 rw_devices; u64 missing_devices; u64 total_rw_bytes; + u64 num_can_discard; struct block_device *latest_bdev; /* all of the devices in the FS, protected by a mutex -- cgit v1.1 From 34f3e4f23ca3d259fe078f62a128d97ca83508ef Mon Sep 17 00:00:00 2001 From: liubo Date: Sat, 6 Aug 2011 08:35:23 +0000 Subject: Btrfs: fix an oops of log replay When btrfs recovers from a crash, it may hit the oops below: ------------[ cut here ]------------ kernel BUG at fs/btrfs/inode.c:4580! [...] RIP: 0010:[] [] btrfs_add_link+0x161/0x1c0 [btrfs] [...] Call Trace: [] ? btrfs_inode_ref_index+0x31/0x80 [btrfs] [] add_inode_ref+0x319/0x3f0 [btrfs] [] replay_one_buffer+0x2c7/0x390 [btrfs] [] walk_down_log_tree+0x32a/0x480 [btrfs] [] walk_log_tree+0xf5/0x240 [btrfs] [] btrfs_recover_log_trees+0x250/0x350 [btrfs] [] ? btrfs_recover_log_trees+0x350/0x350 [btrfs] [] open_ctree+0x1442/0x17d0 [btrfs] [...] This comes from that while replaying an inode ref item, we forget to check those old conflicting DIR_ITEM and DIR_INDEX items in fs/file tree, then we will come to conflict corners which lead to BUG_ON(). Signed-off-by: Liu Bo Tested-by: Andy Lutomirski Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index babee65..786639f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, struct extent_buffer *eb, int slot, struct btrfs_key *key) { - struct inode *dir; - int ret; struct btrfs_inode_ref *ref; + struct btrfs_dir_item *di; + struct inode *dir; struct inode *inode; - char *name; - int namelen; unsigned long ref_ptr; unsigned long ref_end; + char *name; + int namelen; + int ret; int search_done = 0; /* @@ -909,6 +910,25 @@ again: } btrfs_release_path(path); + /* look for a conflicting sequence number */ + di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), + btrfs_inode_ref_index(eb, ref), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(path); + + /* look for a conflicing name */ + di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(path); + insert: /* insert our name */ ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, -- cgit v1.1 From 38c01b9605923cfdff5413e0a12e58ee8d962257 Mon Sep 17 00:00:00 2001 From: liubo Date: Tue, 2 Aug 2011 02:39:03 +0000 Subject: Btrfs: fix a bug of balance on full multi-disk partitions When balancing, we'll first try to shrink devices for some space, but if it is working on a full multi-disk partition with raid protection, we may encounter a bug, that is, while shrinking, total_bytes may be less than bytes_used, and btrfs may allocate a dev extent that accesses out of device's bounds. Then we will not be able to write or read the data which stores at the end of the device, and get the followings: device fsid 0939f071-7ea3-46c8-95df-f176d773bfb6 devid 1 transid 10 /dev/sdb5 Btrfs detected SSD devices, enabling SSD mode btrfs: relocating block group 476315648 flags 9 btrfs: found 4 extents attempt to access beyond end of device sdb5: rw=145, want=546176, limit=546147 attempt to access beyond end of device sdb5: rw=145, want=546304, limit=546147 attempt to access beyond end of device sdb5: rw=145, want=546432, limit=546147 attempt to access beyond end of device sdb5: rw=145, want=546560, limit=546147 attempt to access beyond end of device Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a595f87..46f9a20 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -863,6 +863,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, max_hole_start = search_start; max_hole_size = 0; + hole_size = 0; if (search_start >= search_end) { ret = -ENOSPC; @@ -945,7 +946,14 @@ next: cond_resched(); } - hole_size = search_end- search_start; + /* + * At this point, search_start should be the end of + * allocated dev extents, and when shrinking the device, + * search_end may be smaller than search_start. + */ + if (search_end > search_start) + hole_size = search_end - search_start; + if (hole_size > max_hole_size) { max_hole_start = search_start; max_hole_size = hole_size; @@ -2447,9 +2455,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, total_avail = device->total_bytes - device->bytes_used; else total_avail = 0; - /* avail is off by max(alloc_start, 1MB), but that is the same - * for all devices, so it doesn't hurt the sorting later on - */ + + /* If there is no space on this device, skip it. */ + if (total_avail == 0) + continue; ret = find_free_dev_extent(trans, device, max_stripe_size * dev_stripes, -- cgit v1.1 From cdcb725c05fe0cb71777c66ddc2445fedbbb3c59 Mon Sep 17 00:00:00 2001 From: liubo Date: Wed, 3 Aug 2011 10:15:25 +0000 Subject: Btrfs: check if there is enough space for balancing smarter When checking if there is enough space for balancing a block group, since we do not take raid types into consideration, we do not account corrent amounts of space that we needed. This makes us do some extra work before we get ENOSPC. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 059dfa0..a3e71b5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6728,6 +6728,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) struct btrfs_space_info *space_info; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; struct btrfs_device *device; + u64 min_free; + int index; + int dev_nr = 0; + int dev_min = 1; int full = 0; int ret = 0; @@ -6737,8 +6741,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) if (!block_group) return -1; + min_free = btrfs_block_group_used(&block_group->item); + /* no bytes used, we're good */ - if (!btrfs_block_group_used(&block_group->item)) + if (!min_free) goto out; space_info = block_group->space_info; @@ -6754,10 +6760,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) * all of the extents from this block group. If we can, we're good */ if ((space_info->total_bytes != block_group->key.offset) && - (space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly + - btrfs_block_group_used(&block_group->item) < - space_info->total_bytes)) { + (space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + min_free < space_info->total_bytes)) { spin_unlock(&space_info->lock); goto out; } @@ -6774,9 +6779,29 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) if (full) goto out; + /* + * index: + * 0: raid10 + * 1: raid1 + * 2: dup + * 3: raid0 + * 4: single + */ + index = get_block_group_index(block_group); + if (index == 0) { + dev_min = 4; + min_free /= 2; + } else if (index == 1) { + dev_min = 2; + } else if (index == 2) { + min_free *= 2; + } else if (index == 3) { + dev_min = fs_devices->rw_devices; + min_free /= dev_min; + } + mutex_lock(&root->fs_info->chunk_mutex); list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { - u64 min_free = btrfs_block_group_used(&block_group->item); u64 dev_offset; /* @@ -6787,7 +6812,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) ret = find_free_dev_extent(NULL, device, min_free, &dev_offset, NULL); if (!ret) + dev_nr++; + + if (dev_nr >= dev_min) break; + ret = -1; } } -- cgit v1.1 From cb1b69f4508a1e8c1a7907379eafceb7ae0325ef Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Tue, 9 Aug 2011 07:11:13 +0000 Subject: Btrfs: forced readonly when btrfs_drop_snapshot() fails The filesystem turns readonly instead of returning the error to the caller when detected error in btrfs_drop_snapshot(). and, because the caller doesn't check the error, the function type is changed to 'void'. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/extent-tree.c | 22 ++++++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a6263bd..8842936 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2367,8 +2367,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); -int btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, int update_ref); +void btrfs_drop_snapshot(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int update_ref); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a3e71b5..80d6148 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6277,8 +6277,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, * also make sure backrefs for the shared block and all lower level * blocks are properly updated. */ -int btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, int update_ref) +void btrfs_drop_snapshot(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int update_ref) { struct btrfs_path *path; struct btrfs_trans_handle *trans; @@ -6291,13 +6291,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int level; path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + err = -ENOMEM; + goto out; + } wc = kzalloc(sizeof(*wc), GFP_NOFS); if (!wc) { btrfs_free_path(path); - return -ENOMEM; + err = -ENOMEM; + goto out; } trans = btrfs_start_transaction(tree_root, 0); @@ -6326,7 +6329,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, path->lowest_level = 0; if (ret < 0) { err = ret; - goto out; + goto out_free; } WARN_ON(ret > 0); @@ -6433,11 +6436,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, free_extent_buffer(root->commit_root); kfree(root); } -out: +out_free: btrfs_end_transaction_throttle(trans, tree_root); kfree(wc); btrfs_free_path(path); - return err; +out: + if (err) + btrfs_std_error(root->fs_info, err); + return; } /* -- cgit v1.1 From c97c2916e25c56e878e3e94efd449e2d688fcb31 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 3 Aug 2011 08:11:41 +0000 Subject: Btrfs: use plain page_address() in header fields setget functions We've stopped using highmem for extent buffers. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8842936..8b99c79 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1415,17 +1415,15 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ { \ - type *p = kmap_atomic(eb->first_page, KM_USER0); \ + type *p = page_address(eb->first_page); \ u##bits res = le##bits##_to_cpu(p->member); \ - kunmap_atomic(p, KM_USER0); \ return res; \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ u##bits val) \ { \ - type *p = kmap_atomic(eb->first_page, KM_USER0); \ + type *p = page_address(eb->first_page); \ p->member = cpu_to_le##bits(val); \ - kunmap_atomic(p, KM_USER0); \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ -- cgit v1.1 From f4ac904c411b55e58bb240f332f93db2455f0010 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Aug 2011 14:19:00 +0000 Subject: btrfs: memory leak in btrfs_add_inode_defrag() We don't use the defrag struct on this path. Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 010aec8..0705d15 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -150,6 +150,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, spin_lock(&root->fs_info->defrag_inodes_lock); if (!BTRFS_I(inode)->in_defrag) __btrfs_add_inode_defrag(inode, defrag); + else + kfree(defrag); spin_unlock(&root->fs_info->defrag_inodes_lock); return 0; } -- cgit v1.1 From bb3ac5a4dfc8eeb881206c77d9f925e320d9c41a Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 5 Aug 2011 09:32:35 +0000 Subject: Btrfs: fix wrong free space information Btrfs subtracted the size of the allocated space twice when it allocated the space from the bitmap in the cluster, it broke the free space information and led to oops finally. And this patch also fixes the bug that ctl->free_space was subtracted without lock. Reported-by: Liu Bo Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6377713..6a265b9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1168,9 +1168,9 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); } -static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, - struct btrfs_free_space *info, u64 offset, - u64 bytes) +static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, + u64 offset, u64 bytes) { unsigned long start, count; @@ -1181,6 +1181,13 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, bitmap_clear(info->bitmap, start, count); info->bytes -= bytes; +} + +static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, u64 offset, + u64 bytes) +{ + __bitmap_clear_bits(ctl, info, offset, bytes); ctl->free_space -= bytes; } @@ -1984,7 +1991,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, return 0; ret = search_start; - bitmap_clear_bits(ctl, entry, ret, bytes); + __bitmap_clear_bits(ctl, entry, ret, bytes); return ret; } @@ -2039,7 +2046,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, continue; } } else { - ret = entry->offset; entry->offset += bytes; -- cgit v1.1 From 0e588859618be54ec100373f1b86296271ce5307 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 5 Aug 2011 09:32:37 +0000 Subject: Btrfs: fix uninitialized sync_pending sync_pending is uninitialized before it be used, fix it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 46f9a20..f2a4cc7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -142,7 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) unsigned long limit; unsigned long last_waited = 0; int force_reg = 0; - int sync_pending; + int sync_pending = 0; struct blk_plug plug; /* -- cgit v1.1 From f81c9cdc567cd3160ff9e64868d9a1a7ee226480 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 10 Aug 2011 18:04:04 +0000 Subject: Btrfs: truncate pages from clone ioctl target range We need to truncate page cache pages for the clone ioctl target range or else we'll confuse ourselves to no end. If the old data was cached, we used to still see it (until remount). If the page was partially updated we used to get a mix of old and new data. Signed-off-by: Sage Weil Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2bb0886..b3d249d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2244,6 +2244,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, btrfs_wait_ordered_range(src, off, len); } + /* truncate page cache pages from target inode range */ + truncate_inode_pages_range(&inode->i_data, off, + ALIGN(off + len, PAGE_CACHE_SIZE) - 1); + /* clone data */ key.objectid = btrfs_ino(src); key.type = BTRFS_EXTENT_DATA_KEY; -- cgit v1.1 From c331eb580a0a7906c0cdb8dbae3cfe99e3c0e555 Mon Sep 17 00:00:00 2001 From: Andrew Drake Date: Tue, 16 Aug 2011 11:07:39 -0700 Subject: Input: bcm5974 - Add support for newer MacBookPro8,2 New MacBook Pro devices reporting product name MacBookPro8,2 come with newer/higher resolution touchpads than others with the same product name with USB ID 05ac:0252. This patch adds support for these devices. Signed-off-by: Andrew Drake Reviewed-by: Wanlong Gao Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/bcm5974.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index 48d9ec1..da28018 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -71,6 +71,10 @@ #define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI 0x024c #define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO 0x024d #define USB_DEVICE_ID_APPLE_WELLSPRING6_JIS 0x024e +/* Macbook8,2 (unibody) */ +#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI 0x0252 +#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO 0x0253 +#define USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS 0x0254 #define BCM5974_DEVICE(prod) { \ .match_flags = (USB_DEVICE_ID_MATCH_DEVICE | \ @@ -112,6 +116,10 @@ static const struct usb_device_id bcm5974_table[] = { BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_JIS), + /* MacbookPro8,2 */ + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS), /* Terminating entry */ {} }; @@ -314,6 +322,18 @@ static const struct bcm5974_config bcm5974_config_table[] = { { DIM_X, DIM_X / SN_COORD, -4620, 5140 }, { DIM_Y, DIM_Y / SN_COORD, -150, 6600 } }, + { + USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI, + USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO, + USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS, + HAS_INTEGRATED_BUTTON, + 0x84, sizeof(struct bt_data), + 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, + { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, + { DIM_X, DIM_X / SN_COORD, -4750, 5280 }, + { DIM_Y, DIM_Y / SN_COORD, -150, 6730 } + }, {} }; -- cgit v1.1 From 28ac293363368650963ee4c1e323c1ff502c121f Mon Sep 17 00:00:00 2001 From: Yufeng Shen Date: Tue, 16 Aug 2011 00:40:54 -0700 Subject: Input: atmel_mxt_ts - report pressure information from the driver Atmel mxt1386 touch controller has the touch pressure information so let's report it to the user space. [dtor@mail.ru: added ABS_RESSURE reporting for ST emulation.] Signed-off-by: Yufeng Shen Acked-by: Wanlong Gao Acked-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index ae00604..f5d6685 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -244,6 +244,7 @@ struct mxt_finger { int x; int y; int area; + int pressure; }; /* Each client has this additional data */ @@ -536,6 +537,8 @@ static void mxt_input_report(struct mxt_data *data, int single_id) finger[id].x); input_report_abs(input_dev, ABS_MT_POSITION_Y, finger[id].y); + input_report_abs(input_dev, ABS_MT_PRESSURE, + finger[id].pressure); } else { finger[id].status = 0; } @@ -546,6 +549,8 @@ static void mxt_input_report(struct mxt_data *data, int single_id) if (status != MXT_RELEASE) { input_report_abs(input_dev, ABS_X, finger[single_id].x); input_report_abs(input_dev, ABS_Y, finger[single_id].y); + input_report_abs(input_dev, + ABS_PRESSURE, finger[single_id].pressure); } input_sync(input_dev); @@ -560,6 +565,7 @@ static void mxt_input_touchevent(struct mxt_data *data, int x; int y; int area; + int pressure; /* Check the touch is present on the screen */ if (!(status & MXT_DETECT)) { @@ -584,6 +590,7 @@ static void mxt_input_touchevent(struct mxt_data *data, y = y >> 2; area = message->message[4]; + pressure = message->message[5]; dev_dbg(dev, "[%d] %s x: %d, y: %d, area: %d\n", id, status & MXT_MOVE ? "moved" : "pressed", @@ -594,6 +601,7 @@ static void mxt_input_touchevent(struct mxt_data *data, finger[id].x = x; finger[id].y = y; finger[id].area = area; + finger[id].pressure = pressure; mxt_input_report(data, id); } @@ -1116,6 +1124,8 @@ static int __devinit mxt_probe(struct i2c_client *client, 0, data->max_x, 0, 0); input_set_abs_params(input_dev, ABS_Y, 0, data->max_y, 0, 0); + input_set_abs_params(input_dev, ABS_PRESSURE, + 0, 255, 0, 0); /* For multi touch */ input_mt_init_slots(input_dev, MXT_MAX_FINGER); @@ -1125,6 +1135,8 @@ static int __devinit mxt_probe(struct i2c_client *client, 0, data->max_x, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_Y, 0, data->max_y, 0, 0); + input_set_abs_params(input_dev, ABS_MT_PRESSURE, + 0, 255, 0, 0); input_set_drvdata(input_dev, data); i2c_set_clientdata(client, data); -- cgit v1.1 From 25b7679136fd85b1e5197e36a0ca126163e89590 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 17 Aug 2011 09:20:01 +0200 Subject: ASoC: Fix check for symmetric rate enforcement The ASoC core tries to not enforce symmetric rates when two streams open simultaneously. It does so by checking rtd->rate being zero. This works exactly once after booting because it is not set to zero again when the streams close. Fix this by setting rtd->rate when no active stream is left. [This leads to lots of warnings about not enforcing the symmetry in some situations as there's a race in the userspace API where we know we've got two applications but don't know what rates they want to set. -- broonie ] Signed-off-by: Sascha Hauer Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index b575939..2879c88 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -290,6 +290,9 @@ static int soc_pcm_close(struct snd_pcm_substream *substream) codec_dai->active--; codec->active--; + if (!cpu_dai->active && !codec_dai->active) + rtd->rate = 0; + /* Muting the DAC suppresses artifacts caused during digital * shutdown, for example from stopping clocks. */ -- cgit v1.1 From 8c320c079cde0286d71368961231e426539868b4 Mon Sep 17 00:00:00 2001 From: Jonas Aberg Date: Wed, 17 Aug 2011 19:10:06 +0900 Subject: fat: fix build warning This fixes a compile warning (unititialized variable) in the fat filesystem code. Signed-off-by: Jonas Aberg Signed-off-by: Lee Jones Signed-off-by: Linus Walleij Signed-off-by: OGAWA Hirofumi --- fs/fat/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4ad6473..5efbd5d 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -1231,7 +1231,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots, struct super_block *sb = dir->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */ - struct msdos_dir_entry *de; + struct msdos_dir_entry *uninitialized_var(de); int err, free_slots, i, nr_bhs; loff_t pos, i_pos; -- cgit v1.1 From 186b53701ca5a843b07ca44a8d954dc6043c70f4 Mon Sep 17 00:00:00 2001 From: Mihai Moldovan Date: Wed, 17 Aug 2011 19:10:08 +0900 Subject: fat: fix utf8 iocharset warning message The fat_msg function already formats the given message and appends a newline to it - we don't need to do this in the passed message string as well, or will end up with a blank line printed in the kernel log ring buffer. Also change the loglevel from error to warning. Signed-off-by: Mihai Moldovan Signed-off-by: OGAWA Hirofumi --- fs/fat/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index cb8d839..52bcf58 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1186,9 +1186,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, out: /* UTF-8 doesn't provide FAT semantics */ if (!strcmp(opts->iocharset, "utf8")) { - fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset" + fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset" " for FAT filesystems, filesystem will be " - "case sensitive!\n"); + "case sensitive!"); } /* If user doesn't specify allow_utime, it's initialized from dmask. */ -- cgit v1.1 From 710d4403a45c4040a9aa86971d50958f5ae6ed40 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 17 Aug 2011 19:10:09 +0900 Subject: fat: fat16 support maximum 4GB file/vol size as WinXP or 7. FAT16 support maximum 4GB vol/file size with 64KB cluster size. Win NT/XP/7 increased the maximum cluster size to 64KB, and file/vol size increased 4GB also. Although increasing, the file size of linux FAT is still limited at 2GB. I found that it is limited by sb->maxbytes(0x7fffffff) when partition is formatted by FAT16. sb->s_maxbytes in fill_super should be set to 0xffffffff like fat32. Signed-off-by: Namjae Jeon Signed-off-by: OGAWA Hirofumi --- fs/fat/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 52bcf58..017493b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1365,6 +1365,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->free_clusters = -1; /* Don't know yet */ sbi->free_clus_valid = 0; sbi->prev_free = FAT_START_ENT; + sb->s_maxbytes = 0xffffffff; if (!sbi->fat_length && b->fat32_length) { struct fat_boot_fsinfo *fsinfo; @@ -1375,8 +1376,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->fat_length = le32_to_cpu(b->fat32_length); sbi->root_cluster = le32_to_cpu(b->root_cluster); - sb->s_maxbytes = 0xffffffff; - /* MC - if info_sector is 0, don't multiply by 0 */ sbi->fsinfo_sector = le16_to_cpu(b->info_sector); if (sbi->fsinfo_sector == 0) -- cgit v1.1 From ccbcdf7cf1b5f6c6db30d84095b9c6c53043af55 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 16 Aug 2011 15:07:41 +0100 Subject: xen/x86: replace order-based range checking of M2P table by linear one The order-based approach is not only less efficient (requiring a shift and a compare, typical generated code looking like this mov eax, [machine_to_phys_order] mov ecx, eax shr ebx, cl test ebx, ebx jnz ... whereas a direct check requires just a compare, like in cmp ebx, [machine_to_phys_nr] jae ... ), but also slightly dangerous in the 32-on-64 case - the element address calculation can wrap if the next power of two boundary is sufficiently far away from the actual upper limit of the table, and hence can result in user space addresses being accessed (with it being unknown what may actually be mapped there). Additionally, the elimination of the mistaken use of fls() here (should have been __fls()) fixes a latent issue on x86-64 that would trigger if the code was run on a system with memory extending beyond the 44-bit boundary. CC: stable@kernel.org Signed-off-by: Jan Beulich [v1: Based on Jeremy's feedback] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/page.h | 4 ++-- arch/x86/xen/enlighten.c | 4 ++-- arch/x86/xen/mmu.c | 12 ++++++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 64a619d..7ff4669 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -39,7 +39,7 @@ typedef struct xpaddr { ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) extern unsigned long *machine_to_phys_mapping; -extern unsigned int machine_to_phys_order; +extern unsigned long machine_to_phys_nr; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); @@ -87,7 +87,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; - if (unlikely((mfn >> machine_to_phys_order) != 0)) { + if (unlikely(mfn >= machine_to_phys_nr)) { pfn = ~0; goto try_override; } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 974a528..b960429 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(xen_domain_type); unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; EXPORT_SYMBOL(machine_to_phys_mapping); -unsigned int machine_to_phys_order; -EXPORT_SYMBOL(machine_to_phys_order); +unsigned long machine_to_phys_nr; +EXPORT_SYMBOL(machine_to_phys_nr); struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f987bde..24abc1f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1713,15 +1713,19 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) void __init xen_setup_machphys_mapping(void) { struct xen_machphys_mapping mapping; - unsigned long machine_to_phys_nr_ents; if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { machine_to_phys_mapping = (unsigned long *)mapping.v_start; - machine_to_phys_nr_ents = mapping.max_mfn + 1; + machine_to_phys_nr = mapping.max_mfn + 1; } else { - machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; + machine_to_phys_nr = MACH2PHYS_NR_ENTRIES; } - machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); +#ifdef CONFIG_X86_32 + if ((machine_to_phys_mapping + machine_to_phys_nr) + < machine_to_phys_mapping) + machine_to_phys_nr = (unsigned long *)NULL + - machine_to_phys_mapping; +#endif } #ifdef CONFIG_X86_64 -- cgit v1.1 From 0ace64b85ea7b90e3bffe408b9d7c3364692bfa4 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 1 Aug 2011 21:12:09 +0000 Subject: IBiser: Fix wrong mask when sizeof (dma_addr_t) > sizeof (unsigned long) The code that prepares the SG associated with SCSI command for FMR was buggy for systems with DMA addresses that don't fit in unsigned long, e.g under the 32-bit based XenServer dom0 sizeof(dma_addr_t) is 8. Fix that by casting to unsigned long long a masking constant used by the code. This resolves a crash in iser_sg_to_page_vec on this system. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 342cbc1..db6f3ce 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -89,7 +89,7 @@ } while (0) #define SHIFT_4K 12 -#define SIZE_4K (1UL << SHIFT_4K) +#define SIZE_4K (1ULL << SHIFT_4K) #define MASK_4K (~(SIZE_4K-1)) /* support up to 512KB in one RDMA */ -- cgit v1.1 From 200ae1a08bec8f3fedfcfe94c892d9a024db4e46 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 1 Aug 2011 21:14:09 +0000 Subject: IB/iser: Support iSCSI PDU padding RFC3270 mandates that iSCSI PDUs are padded to the closest integer number of four byte words. Fix the iser code to support that on both the TX/RX flows. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 10 +++++++--- drivers/infiniband/ulp/iser/iser_initiator.c | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 8db008d..9c61b9c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -101,13 +101,17 @@ iscsi_iser_recv(struct iscsi_conn *conn, /* verify PDU length */ datalen = ntoh24(hdr->dlength); - if (datalen != rx_data_len) { - printk(KERN_ERR "iscsi_iser: datalen %d (hdr) != %d (IB) \n", - datalen, rx_data_len); + if (datalen > rx_data_len || (datalen + 4) < rx_data_len) { + iser_err("wrong datalen %d (hdr), %d (IB)\n", + datalen, rx_data_len); rc = ISCSI_ERR_DATALEN; goto error; } + if (datalen != rx_data_len) + iser_dbg("aligned datalen (%d) hdr, %d (IB)\n", + datalen, rx_data_len); + /* read AHS */ ahslen = hdr->hlength * 4; diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 5745b7f..f299de6 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -412,7 +412,7 @@ int iser_send_control(struct iscsi_conn *conn, memcpy(iser_conn->ib_conn->login_buf, task->data, task->data_count); tx_dsg->addr = iser_conn->ib_conn->login_dma; - tx_dsg->length = data_seg_len; + tx_dsg->length = task->data_count; tx_dsg->lkey = device->mr->lkey; mdesc->num_sge = 2; } -- cgit v1.1 From f991879473828f320a714e9494fb37a26ccd6b66 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 17 Aug 2011 13:45:09 +0100 Subject: mm: make HASHED_PAGE_VIRTUAL page_address' struct page argument const. Followup to 33dd4e0ec911 "mm: make some struct page's const" which missed the HASHED_PAGE_VIRTUAL case. Signed-off-by: Ian Campbell Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Michel Lespinasse Cc: Mel Gorman Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hash.h | 2 +- include/linux/mm.h | 2 +- mm/highmem.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/hash.h b/include/linux/hash.h index 06d25c1..b80506b 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -63,7 +63,7 @@ static inline u32 hash_32(u32 val, unsigned int bits) return hash >> (32 - bits); } -static inline unsigned long hash_ptr(void *ptr, unsigned int bits) +static inline unsigned long hash_ptr(const void *ptr, unsigned int bits) { return hash_long((unsigned long)ptr, bits); } diff --git a/include/linux/mm.h b/include/linux/mm.h index fd599f4..c06454d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -737,7 +737,7 @@ static __always_inline void *lowmem_page_address(const struct page *page) #endif #if defined(HASHED_PAGE_VIRTUAL) -void *page_address(struct page *page); +void *page_address(const struct page *page); void set_page_address(struct page *page, void *virtual); void page_address_init(void); #endif diff --git a/mm/highmem.c b/mm/highmem.c index 693394d..5ef672c 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -326,7 +326,7 @@ static struct page_address_slot { spinlock_t lock; /* Protect this bucket's list */ } ____cacheline_aligned_in_smp page_address_htable[1< Date: Wed, 17 Aug 2011 17:40:33 +0100 Subject: mm: fix __page_to_pfn for a const struct page argument This allows the cast in lowmem_page_address (introduced as a warning fixup to 33dd4e0ec911 "mm: make some struct page's const") to be removed. Propagate const'ness to page_to_section() as well since it is required by __page_to_pfn. Signed-off-by: Ian Campbell Acked-by: Rik van Riel Cc: Andrea Arcangeli Cc: Michel Lespinasse Cc: Mel Gorman Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/memory_model.h | 4 ++-- include/linux/mm.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index fb2d63f..aea9e45 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -39,7 +39,7 @@ }) #define __page_to_pfn(pg) \ -({ struct page *__pg = (pg); \ +({ const struct page *__pg = (pg); \ struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg)); \ (unsigned long)(__pg - __pgdat->node_mem_map) + \ __pgdat->node_start_pfn; \ @@ -57,7 +57,7 @@ * section[i].section_mem_map == mem_map's address - start_pfn; */ #define __page_to_pfn(pg) \ -({ struct page *__pg = (pg); \ +({ const struct page *__pg = (pg); \ int __sec = page_to_section(__pg); \ (unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \ }) diff --git a/include/linux/mm.h b/include/linux/mm.h index c06454d..7438071 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -685,7 +685,7 @@ static inline void set_page_section(struct page *page, unsigned long section) page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; } -static inline unsigned long page_to_section(struct page *page) +static inline unsigned long page_to_section(const struct page *page) { return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } @@ -720,7 +720,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone, static __always_inline void *lowmem_page_address(const struct page *page) { - return __va(PFN_PHYS(page_to_pfn((struct page *)page))); + return __va(PFN_PHYS(page_to_pfn(page))); } #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) -- cgit v1.1 From 338d0f0a6fbc82407864606f5b64b75aeb3c70f2 Mon Sep 17 00:00:00 2001 From: Timo Warns Date: Wed, 17 Aug 2011 17:59:56 +0200 Subject: befs: Validate length of long symbolic links. Signed-off-by: Timo Warns Signed-off-by: Linus Torvalds --- fs/befs/linuxvfs.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 54b8c28..720d885 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -474,17 +474,22 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd) befs_data_stream *data = &befs_ino->i_data.ds; befs_off_t len = data->size; - befs_debug(sb, "Follow long symlink"); - - link = kmalloc(len, GFP_NOFS); - if (!link) { - link = ERR_PTR(-ENOMEM); - } else if (befs_read_lsymlink(sb, data, link, len) != len) { - kfree(link); - befs_error(sb, "Failed to read entire long symlink"); + if (len == 0) { + befs_error(sb, "Long symlink with illegal length"); link = ERR_PTR(-EIO); } else { - link[len - 1] = '\0'; + befs_debug(sb, "Follow long symlink"); + + link = kmalloc(len, GFP_NOFS); + if (!link) { + link = ERR_PTR(-ENOMEM); + } else if (befs_read_lsymlink(sb, data, link, len) != len) { + kfree(link); + befs_error(sb, "Failed to read entire long symlink"); + link = ERR_PTR(-EIO); + } else { + link[len - 1] = '\0'; + } } } else { link = befs_ino->i_data.symlink; -- cgit v1.1 From 8919bc13e8d92c5b082c5c0321567383a071f5bc Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 15 Aug 2011 05:25:40 +0000 Subject: net_sched: fix port mirror/redirect stats reporting When a redirected or mirrored packet is dropped by the target device we need to record statistics. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 102fc21..e051398 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -196,8 +196,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; - dev_queue_xmit(skb2); - err = 0; + err = dev_queue_xmit(skb2); out: if (err) { -- cgit v1.1 From ba3211ccd043fae3713793334d64d75bd0a1d029 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Pan=28=E6=BD=98=E5=8D=AB=E5=B9=B3=29?= Date: Mon, 15 Aug 2011 15:57:35 +0000 Subject: bonding:reset backup and inactive flag of slave Eduard Sinelnikov (eduard.sinelnikov@gmail.com) found that if we change bonding mode from active backup to round robin, some slaves are still keeping "backup", and won't transmit packets. As Jay Vosburgh(fubar@us.ibm.com) pointed out that we can work around that by removing the bond_is_active_slave() check, because the "backup" flag is only meaningful for active backup mode. But if we just simply ignore the bond_is_active_slave() check, the transmission will work fine, but we can't maintain the correct value of "backup" flag for each slaves, though it is meaningless for other mode than active backup. I'd like to reset "backup" and "inactive" flag in bond_open, thus we can keep the correct value of them. As for bond_is_active_slave(), I'd like to prepare another patch to handle it. V2: Use C style comment. Move read_lock(&bond->curr_slave_lock). Replace restore with reset, for active backup mode, it means "restore", but for other modes, it means "reset". Signed-off-by: Weiping Pan Reviewed-by: WANG Cong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 38a83ac..43f2ea5 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3419,9 +3419,27 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) static int bond_open(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + int i; bond->kill_timers = 0; + /* reset slave->backup and slave->inactive */ + read_lock(&bond->lock); + if (bond->slave_cnt > 0) { + read_lock(&bond->curr_slave_lock); + bond_for_each_slave(bond, slave, i) { + if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) + && (slave != bond->curr_active_slave)) { + bond_set_slave_inactive_flags(slave); + } else { + bond_set_slave_active_flags(slave); + } + } + read_unlock(&bond->curr_slave_lock); + } + read_unlock(&bond->lock); + INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed); if (bond_is_lb(bond)) { -- cgit v1.1 From 9a75a97296c43c34add7dca8275496186e1b4ae9 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 15 Aug 2011 22:33:34 +0000 Subject: via-velocity: remove non-tagged packet filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's undesired to filter untagged packets at any time. So simply remove this. Reported-by: Stephan Bärwolf Tested-by: Stephan Bärwolf Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/via-velocity.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index deb1eca..7c5336c 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -515,10 +515,6 @@ static void velocity_init_cam_filter(struct velocity_info *vptr) mac_set_cam_mask(regs, vptr->mCAMmask); /* Enable VCAMs */ - - if (test_bit(0, vptr->active_vlans)) - WORD_REG_BITS_ON(MCFG_RTGOPT, ®s->MCFG); - for_each_set_bit(vid, vptr->active_vlans, VLAN_N_VID) { mac_set_vlan_cam(regs, i, (u8 *) &vid); vptr->vCAMmask[i / 8] |= 0x1 << (i % 8); -- cgit v1.1 From 9331db4f00cfee8a79d2147ac83723ef436b9759 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 17 Aug 2011 23:50:37 -0700 Subject: forcedeth: call vlan_mode only if hw supports vlans If hw does not support vlans, dont call nv_vlan_mode because it has no point. I believe that this should fix issues on older non-vlan supportive chips (like Ingo has). Reported-ty: Ingo Molnar Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/forcedeth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index e55df30..6d5fbd4 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -5615,7 +5615,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i goto out_error; } - nv_vlan_mode(dev, dev->features); + if (id->driver_data & DEV_HAS_VLAN) + nv_vlan_mode(dev, dev->features); netif_carrier_off(dev); -- cgit v1.1 From 3fe45aeaf2033c9eaa5028ed5ba68b466008876f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 18 Aug 2011 15:13:17 +0200 Subject: ALSA: hda - Add "PCM" volume to vmaster slave list The new parser may use "PCM" volume, but it was missing the vmaster slave list, thus "Master" volume didn't control it. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=41342 Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9a1aa09..fcb11af 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1784,6 +1784,7 @@ static const char * const alc_slave_vols[] = { "Speaker Playback Volume", "Mono Playback Volume", "Line-Out Playback Volume", + "PCM Playback Volume", NULL, }; @@ -1798,6 +1799,7 @@ static const char * const alc_slave_sws[] = { "Mono Playback Switch", "IEC958 Playback Switch", "Line-Out Playback Switch", + "PCM Playback Switch", NULL, }; -- cgit v1.1 From cb6db4e57632ba8589cc2f9fe1d0aa9116b87ab8 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 15 Aug 2011 17:27:21 +0000 Subject: btrfs: btrfs_permission's RO check shouldn't apply to device nodes This patch tightens the read-only access checks in btrfs_permission to match the constraints in inode_permission. Currently, even though the device node itself will be unmodified, read-write access to device nodes is denied to when the device node resides on a read-only subvolume or a is a file that has been marked read-only by the btrfs conversion utility. With this patch applied, the check only affects regular files, directories, and symlinks. It also restructures the code a bit so that we don't duplicate the MAY_WRITE check for both tests. Signed-off-by: Jeff Mahoney Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 15fceef..0ccc743 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7354,11 +7354,15 @@ static int btrfs_set_page_dirty(struct page *page) static int btrfs_permission(struct inode *inode, int mask) { struct btrfs_root *root = BTRFS_I(inode)->root; + umode_t mode = inode->i_mode; - if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) - return -EROFS; - if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) - return -EACCES; + if (mask & MAY_WRITE && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { + if (btrfs_root_readonly(root)) + return -EROFS; + if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) + return -EACCES; + } return generic_permission(inode, mask); } -- cgit v1.1 From 9a4327ca1f45f82edad7dc0a4e52ce9316e0950c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 18 Aug 2011 10:16:05 -0400 Subject: btrfs: unlock on error in btrfs_file_llseek() There were some unlocks on error missing in a recent patch to btrfs_file_llseek(). Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/file.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 658d669..f7d9df7 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1804,10 +1804,14 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) } } - if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) - return -EINVAL; - if (offset > inode->i_sb->s_maxbytes) - return -EINVAL; + if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) { + ret = -EINVAL; + goto out; + } + if (offset > inode->i_sb->s_maxbytes) { + ret = -EINVAL; + goto out; + } /* Special lock needed here? */ if (offset != file->f_pos) { -- cgit v1.1 From f1e490a7ebe41e06324abbbcd86005b0af02a375 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 18 Aug 2011 10:36:39 -0400 Subject: Btrfs: set i_size properly when fallocating and we already xfstests exposed a problem with preallocate when it fallocates a range that already has an extent. We don't set the new i_size properly because we see that we already have an extent. This isn't right and we should update i_size if the space already exists. With this patch we now pass xfstests 075. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/file.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0705d15..15e5a1c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1631,11 +1631,15 @@ static long btrfs_fallocate(struct file *file, int mode, cur_offset = alloc_start; while (1) { + u64 actual_end; + em = btrfs_get_extent(inode, NULL, 0, cur_offset, alloc_end - cur_offset, 0); BUG_ON(IS_ERR_OR_NULL(em)); last_byte = min(extent_map_end(em), alloc_end); + actual_end = min_t(u64, extent_map_end(em), offset + len); last_byte = (last_byte + mask) & ~mask; + if (em->block_start == EXTENT_MAP_HOLE || (cur_offset >= inode->i_size && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { @@ -1648,6 +1652,16 @@ static long btrfs_fallocate(struct file *file, int mode, free_extent_map(em); break; } + } else if (actual_end > inode->i_size && + !(mode & FALLOC_FL_KEEP_SIZE)) { + /* + * We didn't need to allocate any more space, but we + * still extended the size of the file so we need to + * update i_size. + */ + inode->i_ctime = CURRENT_TIME; + i_size_write(inode, actual_end); + btrfs_ordered_update_i_size(inode, actual_end, NULL); } free_extent_map(em); -- cgit v1.1 From e574044acbad7421879270a80acd337459c94cc8 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 18 Aug 2011 15:02:47 +0300 Subject: ASoC: omap: Fix build errors in ams-delta Fix "error: too few arguments to function 'ams_delta_set_bias_level'" build errors in ams-delta.c that were introduced after commit d4c6005 ("ASoC: Add context parameter to card DAPM callbacks") by adding dapm context to ams_delta_set_bias_level calls. Signed-off-by: Jarkko Nikula Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/omap/ams-delta.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/omap/ams-delta.c b/sound/soc/omap/ams-delta.c index 30fe0d0..0aa475f 100644 --- a/sound/soc/omap/ams-delta.c +++ b/sound/soc/omap/ams-delta.c @@ -514,7 +514,7 @@ static int ams_delta_cx20442_init(struct snd_soc_pcm_runtime *rtd) } /* Set codec bias level */ - ams_delta_set_bias_level(card, SND_SOC_BIAS_STANDBY); + ams_delta_set_bias_level(card, dapm, SND_SOC_BIAS_STANDBY); /* Add hook switch - can be used to control the codec from userspace * even if line discipline fails */ @@ -649,7 +649,9 @@ static void __exit ams_delta_module_exit(void) ams_delta_hook_switch_gpios); /* Keep modem power on */ - ams_delta_set_bias_level(&ams_delta_audio_card, SND_SOC_BIAS_STANDBY); + ams_delta_set_bias_level(&ams_delta_audio_card, + &ams_delta_audio_card.rtd[0].codec->dapm, + SND_SOC_BIAS_STANDBY); platform_device_unregister(cx20442_platform_device); platform_device_unregister(ams_delta_audio_platform_device); -- cgit v1.1 From 13589c437daf4c8e429b3236c0b923de1c9420d8 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 18 Aug 2011 04:41:55 +0000 Subject: [CIFS] possible memory corruption on mount CIFS cleanup_volume_info_contents() looks like having a memory corruption problem. When UNCip is set to "&vol->UNC[2]" in cifs_parse_mount_options(), it should not be kfree()-ed in cleanup_volume_info_contents(). Introduced in commit b946845a9dc523c759cae2b6a0f6827486c3221a Signed-off-by: J.R. Okajima Reviewed-by: Jeff Layton CC: Stable Signed-off-by: Steve French --- fs/cifs/connect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 80c2e3a..633c246 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2878,7 +2878,8 @@ cleanup_volume_info_contents(struct smb_vol *volume_info) kfree(volume_info->username); kzfree(volume_info->password); kfree(volume_info->UNC); - kfree(volume_info->UNCip); + if (volume_info->UNCip != volume_info->UNC + 2) + kfree(volume_info->UNCip); kfree(volume_info->domainname); kfree(volume_info->iocharset); kfree(volume_info->prepath); -- cgit v1.1 From 04c05b4a68c0ab0d6bb41c710a646e56f62a70a3 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 18 Aug 2011 04:44:35 +0000 Subject: update cifs version to 1.75 Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index cb71dc1..95da802 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -125,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "1.74" +#define CIFS_VERSION "1.75" #endif /* _CIFSFS_H */ -- cgit v1.1 From 8cf2d2399ab60842f55598bc1b00fd15503b9950 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 18 Aug 2011 09:17:00 +0200 Subject: i7core_edac: fixed typo in error count calculation Based on a patch from the PaX Team, found during a clang analysis pass. Signed-off-by: Mathias Krause Acked-by: Mauro Carvalho Chehab Cc: PaX Team Cc: stable@kernel.org [v2.6.35+] Signed-off-by: Linus Torvalds --- drivers/edac/i7core_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 04f1e7c..f6cf448 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1670,7 +1670,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, char *type, *optype, *err, *msg; unsigned long error = m->status & 0x1ff0000l; u32 optypenum = (m->status >> 4) & 0x07; - u32 core_err_cnt = (m->status >> 38) && 0x7fff; + u32 core_err_cnt = (m->status >> 38) & 0x7fff; u32 dimm = (m->misc >> 16) & 0x3; u32 channel = (m->misc >> 18) & 0x3; u32 syndrome = m->misc >> 32; -- cgit v1.1 From d522a0d17963e9c2e556db2cbd60c96d40505b6c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 18 Aug 2011 12:19:27 -0700 Subject: irqdesc: fix new kernel-doc warning Fix kernel-doc warning in irqdesc.c: Warning(kernel/irq/irqdesc.c:353): No description found for parameter 'owner' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- kernel/irq/irqdesc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index cb65d03..039b889 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -344,6 +344,7 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * @from: Start the search from this irq number * @cnt: Number of consecutive irqs to allocate. * @node: Preferred node on which the irq descriptor should be allocated + * @owner: Owning module (can be NULL) * * Returns the first irq number or error code */ -- cgit v1.1 From ebd1699ec5f1a6f1f2df6b48fa54bc6ff790143c Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 18 Aug 2011 23:52:36 -0400 Subject: [libata] sata_sil: fix used-uninit warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Init 'serror' to silence the following warning: drivers/ata/sata_sil.c: In function ‘sil_interrupt’: drivers/ata/sata_sil.c:453:14: warning: ‘serror’ may be used uninitialized in this function [-Wuninitialized] This is not a 'can never happen' but is nonetheless extremely unlikely. The easiest and cleanest warning fix is simply to init the var, rather than worry about marking the var uninit-ok. Signed-off-by: Jeff Garzik --- drivers/ata/sata_sil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c index 98c1d78..9dfb40b 100644 --- a/drivers/ata/sata_sil.c +++ b/drivers/ata/sata_sil.c @@ -438,7 +438,7 @@ static void sil_host_intr(struct ata_port *ap, u32 bmdma2) u8 status; if (unlikely(bmdma2 & SIL_DMA_SATA_IRQ)) { - u32 serror; + u32 serror = 0xffffffff; /* SIEN doesn't mask SATA IRQs on some 3112s. Those * controllers continue to assert IRQ as long as -- cgit v1.1 From 6d0e194d2eefcaab6dbdca1f639748660144acb5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 4 Aug 2011 11:15:07 +0200 Subject: pata_via: disable ATAPI DMA on AVERATEC 3200 On AVERATEC 3200, pata_via causes memory corruption with ATAPI DMA, which often leads to random kernel oops. The cause of the problem is not well understood yet and only small subset of machines using the controller seem affected. Blacklist ATAPI DMA on the machine. Signed-off-by: Tejun Heo Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=11426 Reported-and-tested-by: Jim Bray Cc: Alan Cox Cc: stable@kernel.org Signed-off-by: Jeff Garzik --- drivers/ata/pata_via.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 65e4be6..8e9f504 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -124,6 +124,17 @@ static const struct via_isa_bridge { { NULL } }; +static const struct dmi_system_id no_atapi_dma_dmi_table[] = { + { + .ident = "AVERATEC 3200", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "AVERATEC"), + DMI_MATCH(DMI_BOARD_NAME, "3200"), + }, + }, + { } +}; + struct via_port { u8 cached_device; }; @@ -355,6 +366,13 @@ static unsigned long via_mode_filter(struct ata_device *dev, unsigned long mask) mask &= ~ ATA_MASK_UDMA; } } + + if (dev->class == ATA_DEV_ATAPI && + dmi_check_system(no_atapi_dma_dmi_table)) { + ata_dev_warn(dev, "controller locks up on ATAPI DMA, forcing PIO\n"); + mask &= ATA_MASK_PIO; + } + return mask; } -- cgit v1.1 From e39c75cf3e045c2fb3988770b207dfd09c30d4ac Mon Sep 17 00:00:00 2001 From: "Arnaud Patard (Rtp)" Date: Tue, 26 Jul 2011 16:58:19 +0200 Subject: ata: Add iMX pata support Add basic support for pata on iMX. It has been tested only on imx51. SDMA support will probably be added later so this version supports only PIO. v2: - enable only when needed IORDY - use dev_get_drvdata v3: - add missing clk_put() calls - use platform_get_irq() - fix resume code to avoid disabling IORDY on resume v4: - Remove EXPERIMENTAL and switch to depends on ARCH_MXC - Use devm_kzalloc() - make clock a must-have - Use only 1 ioremap Signed-off-by: Arnaud Patard Signed-off-by: Jeff Garzik --- drivers/ata/Kconfig | 9 ++ drivers/ata/Makefile | 1 + drivers/ata/pata_imx.c | 253 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 263 insertions(+) create mode 100644 drivers/ata/pata_imx.c diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index ca3e6be..5987e0b 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -468,6 +468,15 @@ config PATA_ICSIDE interface card. This is not required for ICS partition support. If you are unsure, say N to this. +config PATA_IMX + tristate "PATA support for Freescale iMX" + depends on ARCH_MXC + help + This option enables support for the PATA host available on Freescale + iMX SoCs. + + If unsure, say N. + config PATA_IT8213 tristate "IT8213 PATA support (Experimental)" depends on PCI && EXPERIMENTAL diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 8ac64e1..9550d69 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_PATA_HPT37X) += pata_hpt37x.o obj-$(CONFIG_PATA_HPT3X2N) += pata_hpt3x2n.o obj-$(CONFIG_PATA_HPT3X3) += pata_hpt3x3.o obj-$(CONFIG_PATA_ICSIDE) += pata_icside.o +obj-$(CONFIG_PATA_IMX) += pata_imx.o obj-$(CONFIG_PATA_IT8213) += pata_it8213.o obj-$(CONFIG_PATA_IT821X) += pata_it821x.o obj-$(CONFIG_PATA_JMICRON) += pata_jmicron.o diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c new file mode 100644 index 0000000..ca9d9ca --- /dev/null +++ b/drivers/ata/pata_imx.c @@ -0,0 +1,253 @@ +/* + * Freescale iMX PATA driver + * + * Copyright (C) 2011 Arnaud Patard + * + * Based on pata_platform - Copyright (C) 2006 - 2007 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * TODO: + * - dmaengine support + * - check if timing stuff needed + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_imx" + +#define PATA_IMX_ATA_CONTROL 0x24 +#define PATA_IMX_ATA_CTRL_FIFO_RST_B (1<<7) +#define PATA_IMX_ATA_CTRL_ATA_RST_B (1<<6) +#define PATA_IMX_ATA_CTRL_IORDY_EN (1<<0) +#define PATA_IMX_ATA_INT_EN 0x2C +#define PATA_IMX_ATA_INTR_ATA_INTRQ2 (1<<3) +#define PATA_IMX_DRIVE_DATA 0xA0 +#define PATA_IMX_DRIVE_CONTROL 0xD8 + +struct pata_imx_priv { + struct clk *clk; + /* timings/interrupt/control regs */ + u8 *host_regs; + u32 ata_ctl; +}; + +static int pata_imx_set_mode(struct ata_link *link, struct ata_device **unused) +{ + struct ata_device *dev; + struct ata_port *ap = link->ap; + struct pata_imx_priv *priv = ap->host->private_data; + u32 val; + + ata_for_each_dev(dev, link, ENABLED) { + dev->pio_mode = dev->xfer_mode = XFER_PIO_0; + dev->xfer_shift = ATA_SHIFT_PIO; + dev->flags |= ATA_DFLAG_PIO; + + val = __raw_readl(priv->host_regs + PATA_IMX_ATA_CONTROL); + if (ata_pio_need_iordy(dev)) + val |= PATA_IMX_ATA_CTRL_IORDY_EN; + else + val &= ~PATA_IMX_ATA_CTRL_IORDY_EN; + __raw_writel(val, priv->host_regs + PATA_IMX_ATA_CONTROL); + + ata_dev_printk(dev, KERN_INFO, "configured for PIO\n"); + } + return 0; +} + +static struct scsi_host_template pata_imx_sht = { + ATA_PIO_SHT(DRV_NAME), +}; + +static struct ata_port_operations pata_imx_port_ops = { + .inherits = &ata_sff_port_ops, + .sff_data_xfer = ata_sff_data_xfer_noirq, + .cable_detect = ata_cable_unknown, + .set_mode = pata_imx_set_mode, +}; + +static void pata_imx_setup_port(struct ata_ioports *ioaddr) +{ + /* Fixup the port shift for platforms that need it */ + ioaddr->data_addr = ioaddr->cmd_addr + (ATA_REG_DATA << 2); + ioaddr->error_addr = ioaddr->cmd_addr + (ATA_REG_ERR << 2); + ioaddr->feature_addr = ioaddr->cmd_addr + (ATA_REG_FEATURE << 2); + ioaddr->nsect_addr = ioaddr->cmd_addr + (ATA_REG_NSECT << 2); + ioaddr->lbal_addr = ioaddr->cmd_addr + (ATA_REG_LBAL << 2); + ioaddr->lbam_addr = ioaddr->cmd_addr + (ATA_REG_LBAM << 2); + ioaddr->lbah_addr = ioaddr->cmd_addr + (ATA_REG_LBAH << 2); + ioaddr->device_addr = ioaddr->cmd_addr + (ATA_REG_DEVICE << 2); + ioaddr->status_addr = ioaddr->cmd_addr + (ATA_REG_STATUS << 2); + ioaddr->command_addr = ioaddr->cmd_addr + (ATA_REG_CMD << 2); +} + +static int __devinit pata_imx_probe(struct platform_device *pdev) +{ + struct ata_host *host; + struct ata_port *ap; + struct pata_imx_priv *priv; + int irq = 0; + struct resource *io_res; + + io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (io_res == NULL) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (irq <= 0) + return -EINVAL; + + priv = devm_kzalloc(&pdev->dev, + sizeof(struct pata_imx_priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) { + dev_err(&pdev->dev, "Failed to get clock\n"); + return PTR_ERR(priv->clk); + } + + clk_enable(priv->clk); + + host = ata_host_alloc(&pdev->dev, 1); + if (!host) + goto free_priv; + + host->private_data = priv; + ap = host->ports[0]; + + ap->ops = &pata_imx_port_ops; + ap->pio_mask = ATA_PIO0; + ap->flags |= ATA_FLAG_SLAVE_POSS; + + priv->host_regs = devm_ioremap(&pdev->dev, io_res->start, + resource_size(io_res)); + if (!priv->host_regs) { + dev_err(&pdev->dev, "failed to map IO/CTL base\n"); + goto free_priv; + } + + ap->ioaddr.cmd_addr = priv->host_regs + PATA_IMX_DRIVE_DATA; + ap->ioaddr.ctl_addr = priv->host_regs + PATA_IMX_DRIVE_CONTROL; + + ap->ioaddr.altstatus_addr = ap->ioaddr.ctl_addr; + + pata_imx_setup_port(&ap->ioaddr); + + ata_port_desc(ap, "cmd 0x%llx ctl 0x%llx", + (unsigned long long)io_res->start + PATA_IMX_DRIVE_DATA, + (unsigned long long)io_res->start + PATA_IMX_DRIVE_CONTROL); + + /* deassert resets */ + __raw_writel(PATA_IMX_ATA_CTRL_FIFO_RST_B | + PATA_IMX_ATA_CTRL_ATA_RST_B, + priv->host_regs + PATA_IMX_ATA_CONTROL); + /* enable interrupts */ + __raw_writel(PATA_IMX_ATA_INTR_ATA_INTRQ2, + priv->host_regs + PATA_IMX_ATA_INT_EN); + + /* activate */ + return ata_host_activate(host, irq, ata_sff_interrupt, 0, + &pata_imx_sht); + +free_priv: + clk_disable(priv->clk); + clk_put(priv->clk); + return -ENOMEM; +} + +static int __devexit pata_imx_remove(struct platform_device *pdev) +{ + struct ata_host *host = dev_get_drvdata(&pdev->dev); + struct pata_imx_priv *priv = host->private_data; + + ata_host_detach(host); + + __raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN); + + clk_disable(priv->clk); + clk_put(priv->clk); + + return 0; +} + +#ifdef CONFIG_PM +static int pata_imx_suspend(struct device *dev) +{ + struct ata_host *host = dev_get_drvdata(dev); + struct pata_imx_priv *priv = host->private_data; + int ret; + + ret = ata_host_suspend(host, PMSG_SUSPEND); + if (!ret) { + __raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN); + priv->ata_ctl = + __raw_readl(priv->host_regs + PATA_IMX_ATA_CONTROL); + clk_disable(priv->clk); + } + + return ret; +} + +static int pata_imx_resume(struct device *dev) +{ + struct ata_host *host = dev_get_drvdata(dev); + struct pata_imx_priv *priv = host->private_data; + + clk_enable(priv->clk); + + __raw_writel(priv->ata_ctl, priv->host_regs + PATA_IMX_ATA_CONTROL); + + __raw_writel(PATA_IMX_ATA_INTR_ATA_INTRQ2, + priv->host_regs + PATA_IMX_ATA_INT_EN); + + ata_host_resume(host); + + return 0; +} + +static const struct dev_pm_ops pata_imx_pm_ops = { + .suspend = pata_imx_suspend, + .resume = pata_imx_resume, +}; +#endif + +static struct platform_driver pata_imx_driver = { + .probe = pata_imx_probe, + .remove = __devexit_p(pata_imx_remove), + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &pata_imx_pm_ops, +#endif + }, +}; + +static int __init pata_imx_init(void) +{ + return platform_driver_register(&pata_imx_driver); +} + +static void __exit pata_imx_exit(void) +{ + platform_driver_unregister(&pata_imx_driver); +} +module_init(pata_imx_init); +module_exit(pata_imx_exit); + +MODULE_AUTHOR("Arnaud Patard "); +MODULE_DESCRIPTION("low-level driver for iMX PATA"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:" DRV_NAME); -- cgit v1.1 From a081da630d64acf132b2db1043c586b993d49da7 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 8 Aug 2011 13:17:57 +0200 Subject: drivers/ata/sata_dwc_460ex.c: add missing kfree Currently, error handling code in this function calls the function sata_dwc_port_stop, but this function has essentially no effect if hsdevp has not been stored in ap, which is the case throughout this function. The only effect is to print a debugging message including ap->print_id. The code is rewritten to not call sata_dwc_port_stop, but instead to jump to a local label that prints the original error message and the print_id information. In the case where hsdevp has been already allocated (but not yet stored in ap), this value is freed as well. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @exists@ local idexpression x; statement S,S1; expression E; identifier fl; expression *ptr != NULL; @@ x = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S <... when != x when != if (...) { <+...kfree(x)...+> } when any when != true x == NULL x->fl ...> ( if (x == NULL) S1 | if (...) { ... when != x when forall ( return \(0\|<+...x...+>\|ptr\); | * return ...; ) } ) // Signed-off-by: Julia Lawall Signed-off-by: Jeff Garzik --- drivers/ata/sata_dwc_460ex.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 0a9a774..5c42374 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -1329,7 +1329,7 @@ static int sata_dwc_port_start(struct ata_port *ap) dev_err(ap->dev, "%s: dma_alloc_coherent failed\n", __func__); err = -ENOMEM; - goto CLEANUP; + goto CLEANUP_ALLOC; } } @@ -1349,15 +1349,13 @@ static int sata_dwc_port_start(struct ata_port *ap) /* Clear any error bits before libata starts issuing commands */ clear_serror(); ap->private_data = hsdevp; + dev_dbg(ap->dev, "%s: done\n", __func__); + return 0; +CLEANUP_ALLOC: + kfree(hsdevp); CLEANUP: - if (err) { - sata_dwc_port_stop(ap); - dev_dbg(ap->dev, "%s: fail\n", __func__); - } else { - dev_dbg(ap->dev, "%s: done\n", __func__); - } - + dev_dbg(ap->dev, "%s: fail. ap->id = %d\n", __func__, ap->print_id); return err; } -- cgit v1.1 From c5114cd59d2664f258b0d021d79b1532d94bdc2b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 18 Aug 2011 21:29:27 -0700 Subject: vlan: reset headers on accel emulation path It's after all necessary to do reset headers here. The reason is we cannot depend that it gets reseted in __netif_receive_skb once skb is reinjected. For incoming vlanids without vlan_dev, vlan_do_receive() returns false with skb != NULL and __netif_reveive_skb continues, skb is not reinjected. This might be good material for 3.0-stable as well Reported-by: Mike Auty Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 5f27f8e..f1f2f7b 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -167,6 +167,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) if (unlikely(!skb)) goto err_free; + skb_reset_network_header(skb); + skb_reset_transport_header(skb); return skb; err_free: -- cgit v1.1 From 2d5b2c5ca0d3ebe707386b3add365496460cf918 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 18 Aug 2011 21:30:37 -0700 Subject: net: netdev-features.txt update to Documentation/networking/00-INDEX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update netdev-features.txt entry in 00-INDEX to incorporate feedback by Michał Mirosław. v2: restored tabs that were inadvertently changed to spaces in v1. sorry for the error. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/00-INDEX | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 811252b..bbce121 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -135,7 +135,7 @@ multiqueue.txt netconsole.txt - The network console module netconsole.ko: configuration and notes. netdev-features.txt - - Network interface "feature mess and how to get out from it alive". + - Network interface features API description. netdevices.txt - info on network device driver functions exported to the kernel. netif-msg.txt -- cgit v1.1 From 4a0342ca8e8150bd47e7118a76e300692a1b6b7b Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 17 Aug 2011 22:14:57 +0000 Subject: sparc: fix array bounds error setting up PCIC NMI trap CC arch/sparc/kernel/pcic.o arch/sparc/kernel/pcic.c: In function 'pcic_probe': arch/sparc/kernel/pcic.c:359:33: error: array subscript is above array bounds [-Werror=array-bounds] arch/sparc/kernel/pcic.c:359:8: error: array subscript is above array bounds [-Werror=array-bounds] arch/sparc/kernel/pcic.c:360:33: error: array subscript is above array bounds [-Werror=array-bounds] arch/sparc/kernel/pcic.c:360:8: error: array subscript is above array bounds [-Werror=array-bounds] arch/sparc/kernel/pcic.c:361:33: error: array subscript is above array bounds [-Werror=array-bounds] arch/sparc/kernel/pcic.c:361:8: error: array subscript is above array bounds [-Werror=array-bounds] cc1: all warnings being treated as errors I'm not particularly familiar with sparc but t_nmi (defined in head_32.S via the TRAP_ENTRY macro) and pcic_nmi_trap_patch (defined in entry.S) both appear to be 4 instructions long and I presume from the usage that instructions are int sized. Signed-off-by: Ian Campbell Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Reviewed-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/kernel/pcic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index a19f041..1aaf8c1 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -352,8 +352,8 @@ int __init pcic_probe(void) strcpy(pbm->prom_name, namebuf); { - extern volatile int t_nmi[1]; - extern int pcic_nmi_trap_patch[1]; + extern volatile int t_nmi[4]; + extern int pcic_nmi_trap_patch[4]; t_nmi[0] = pcic_nmi_trap_patch[0]; t_nmi[1] = pcic_nmi_trap_patch[1]; -- cgit v1.1 From 38b65190c6ab0be8ce7cff69e734ca5b5e7fa309 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 19 Aug 2011 07:55:10 +0200 Subject: ALSA: usb-audio - Fix missing mixer dB information The recent fix for testing dB range at the mixer creation time seems to cause regressions in some devices. In such devices, reading the dB info at probing time gives an error, thus both dBmin and dBmax are still zero, and TLV flag isn't set although the later read of dB info succeeds. This patch adds a workaround for such a case by assuming that the later read will succeed. In future, a similar test should be performed in a case where a wrong dB range is seen even in the later read. Signed-off-by: Takashi Iwai Cc: --- sound/usb/mixer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index c04d7c7..cdd19d7 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -152,6 +152,7 @@ static inline void check_mapped_dB(const struct usbmix_name_map *p, if (p && p->dB) { cval->dBmin = p->dB->min; cval->dBmax = p->dB->max; + cval->initialized = 1; } } @@ -1092,7 +1093,7 @@ static void build_feature_ctl(struct mixer_build *state, void *raw_desc, " Switch" : " Volume"); if (control == UAC_FU_VOLUME) { check_mapped_dB(map, cval); - if (cval->dBmin < cval->dBmax) { + if (cval->dBmin < cval->dBmax || !cval->initialized) { kctl->tlv.c = mixer_vol_tlv; kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ | -- cgit v1.1 From b53d1ed734a2b9af8da115b836b658daa7d47a48 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Aug 2011 08:34:48 +0200 Subject: Revert "cfq: Remove special treatment for metadata rqs." We have a kernel build regression since 3.1-rc1, which is about 10% regression. The kernel source is in an ext3 filesystem. Alex Shi bisect it to commit: commit a07405b7802691d29ab3b23bdc76ee6d006aad0b Author: Justin TerAvest Date: Sun Jul 10 22:09:19 2011 +0200 cfq: Remove special treatment for metadata rqs. Apparently this is caused by lack metadata preemption, where ext3/ext4 do use READ_META. I didn't see a way to fix the issue, so suggest reverting the patch. This reverts commit a07405b7802691d29ab3b23bdc76ee6d006aad0b. Reported-by: Alex Shi Reported-by: Shaohua Li Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 6508345..a33bd43 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -130,6 +130,8 @@ struct cfq_queue { unsigned long slice_end; long slice_resid; + /* pending metadata requests */ + int meta_pending; /* number of requests that are on the dispatch list or inside driver */ int dispatched; @@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, if (rq_is_sync(rq1) != rq_is_sync(rq2)) return rq_is_sync(rq1) ? rq1 : rq2; + if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META) + return rq1->cmd_flags & REQ_META ? rq1 : rq2; + s1 = blk_rq_pos(rq1); s2 = blk_rq_pos(rq2); @@ -1607,6 +1612,10 @@ static void cfq_remove_request(struct request *rq) cfqq->cfqd->rq_queued--; cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); + if (rq->cmd_flags & REQ_META) { + WARN_ON(!cfqq->meta_pending); + cfqq->meta_pending--; + } } static int cfq_merge(struct request_queue *q, struct request **req, @@ -3360,6 +3369,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, return true; /* + * So both queues are sync. Let the new request get disk time if + * it's a metadata request and the current queue is doing regular IO. + */ + if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending) + return true; + + /* * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. */ if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) @@ -3423,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_io_context *cic = RQ_CIC(rq); cfqd->rq_queued++; + if (rq->cmd_flags & REQ_META) + cfqq->meta_pending++; cfq_update_io_thinktime(cfqd, cfqq, cic); cfq_update_io_seektime(cfqd, cfqq, rq); -- cgit v1.1 From 6c58addca802950917765380257bebec0998a7da Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 17 Aug 2011 10:07:58 +0100 Subject: ARM: 7019/1: Footbridge: select CLKEVT_I8253 for ARCH_NETWINDER Since commit 8560a6cfc9818edde1fd8677961714b264ffa03d "arm: Footbridge: Use common i8253 clockevent", ARCH_NETWINDER needs to select CLKEVT_I8253. This patch fixes below build error with "make netwinder_defconfig". LD .tmp_vmlinux1 arch/arm/mach-footbridge/built-in.o: In function `isa_timer_init': isa-rtc.c:(.init.text+0x12c8): undefined reference to `clockevent_i8253_init' isa-rtc.c:(.init.text+0x12d0): undefined reference to `i8253_clockevent' arch/arm/mach-footbridge/built-in.o:(.data+0x198): undefined reference to `i8253_clockevent' make: *** [.tmp_vmlinux1] Error 1 Signed-off-by: Axel Lin Acked-by: Thomas Gleixner Signed-off-by: Russell King --- arch/arm/mach-footbridge/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig index dc26fff..c8e7afc 100644 --- a/arch/arm/mach-footbridge/Kconfig +++ b/arch/arm/mach-footbridge/Kconfig @@ -62,6 +62,7 @@ config ARCH_EBSA285_HOST config ARCH_NETWINDER bool "NetWinder" select CLKSRC_I8253 + select CLKEVT_I8253 select FOOTBRIDGE_HOST select ISA select ISA_DMA -- cgit v1.1 From 98e77438aed3cd3343cbb86825127b1d9d2bea33 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Fri, 19 Aug 2011 03:19:07 -0700 Subject: ipv6: Fix ipv6_getsockopt for IPV6_2292PKTOPTIONS IPV6_2292PKTOPTIONS is broken for 32-bit applications running in COMPAT mode on 64-bit kernels. The same problem was fixed for IPv4 with the patch: ipv4: Fix ip_getsockopt for IP_PKTOPTIONS, commit dd23198e58cd35259dd09e8892bbdb90f1d57748 Signed-off-by: Sorin Dumitru Signed-off-by: Daniel Baluta Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 9cb191e..147ede38 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -913,7 +913,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, } static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen, unsigned flags) { struct ipv6_pinfo *np = inet6_sk(sk); int len; @@ -962,7 +962,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, msg.msg_control = optval; msg.msg_controllen = len; - msg.msg_flags = 0; + msg.msg_flags = flags; lock_sock(sk); skb = np->pktoptions; @@ -1222,7 +1222,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, if(level != SOL_IPV6) return -ENOPROTOOPT; - err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); + err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { @@ -1264,7 +1264,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, return compat_mc_getsockopt(sk, level, optname, optval, optlen, ipv6_getsockopt); - err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); + err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, + MSG_CMSG_COMPAT); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { -- cgit v1.1 From ac0d1516a2903226c19f3b4a4b323a9ffbede7d0 Mon Sep 17 00:00:00 2001 From: Kukjin Kim Date: Thu, 28 Jul 2011 08:16:34 +0900 Subject: ARM: S5P64X0: Replace irq_gc_ack() with irq_gc_ack_set_bit() According to commit 659fb32d1b67476f4ade25e9ea0e2642a5b9c4b5 ("replace irq_gc_ack() with {set,clr}_bit variants"), this should be fixed. Signed-off-by: Kukjin Kim --- arch/arm/mach-s5p64x0/irq-eint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-s5p64x0/irq-eint.c b/arch/arm/mach-s5p64x0/irq-eint.c index 69ed454..fe7380f 100644 --- a/arch/arm/mach-s5p64x0/irq-eint.c +++ b/arch/arm/mach-s5p64x0/irq-eint.c @@ -129,7 +129,7 @@ static int s5p64x0_alloc_gc(void) } ct = gc->chip_types; - ct->chip.irq_ack = irq_gc_ack; + ct->chip.irq_ack = irq_gc_ack_set_bit; ct->chip.irq_mask = irq_gc_mask_set_bit; ct->chip.irq_unmask = irq_gc_mask_clr_bit; ct->chip.irq_set_type = s5p64x0_irq_eint_set_type; -- cgit v1.1 From b8a297d3f842f4f7dae98cf85701da069204b0b1 Mon Sep 17 00:00:00 2001 From: Kukjin Kim Date: Fri, 29 Jul 2011 10:23:45 +0900 Subject: ARM: SAMSUNG: Fix Section mismatch in samsung_bl_set() WARNING: vmlinux.o(.text+0xf47c): Section mismatch in reference from the function samsung_bl_set() to the (unknown reference) .init.data:(unknown) The function samsung_bl_set() references the (unknown reference) __initdata (unknown). This is often because samsung_bl_set lacks a __initdata annotation or the annotation of (unknown) is wrong. Signed-off-by: Kukjin Kim --- arch/arm/plat-samsung/include/plat/backlight.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-samsung/include/plat/backlight.h b/arch/arm/plat-samsung/include/plat/backlight.h index 51d8da8..ad530c7 100644 --- a/arch/arm/plat-samsung/include/plat/backlight.h +++ b/arch/arm/plat-samsung/include/plat/backlight.h @@ -20,7 +20,7 @@ struct samsung_bl_gpio_info { int func; }; -extern void samsung_bl_set(struct samsung_bl_gpio_info *gpio_info, +extern void __init samsung_bl_set(struct samsung_bl_gpio_info *gpio_info, struct platform_pwm_backlight_data *bl_data); #endif /* __ASM_PLAT_BACKLIGHT_H */ -- cgit v1.1 From c1a238aadf32daf23db13617fc0b401080c9ab04 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Thu, 11 Aug 2011 16:36:41 +0900 Subject: ARM: EXYNOS4: Use the correct regulator names on universal_c210 Use the correct regulator names for cpufreq Signed-off-by: Kyungmin Park Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/mach-universal_c210.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-exynos4/mach-universal_c210.c b/arch/arm/mach-exynos4/mach-universal_c210.c index 0e280d1..b3b5d89 100644 --- a/arch/arm/mach-exynos4/mach-universal_c210.c +++ b/arch/arm/mach-exynos4/mach-universal_c210.c @@ -79,7 +79,7 @@ static struct s3c2410_uartcfg universal_uartcfgs[] __initdata = { }; static struct regulator_consumer_supply max8952_consumer = - REGULATOR_SUPPLY("vddarm", NULL); + REGULATOR_SUPPLY("vdd_arm", NULL); static struct max8952_platform_data universal_max8952_pdata __initdata = { .gpio_vid0 = EXYNOS4_GPX0(3), @@ -105,7 +105,7 @@ static struct max8952_platform_data universal_max8952_pdata __initdata = { }; static struct regulator_consumer_supply lp3974_buck1_consumer = - REGULATOR_SUPPLY("vddint", NULL); + REGULATOR_SUPPLY("vdd_int", NULL); static struct regulator_consumer_supply lp3974_buck2_consumer = REGULATOR_SUPPLY("vddg3d", NULL); -- cgit v1.1 From af8a9f63b45758591b8412d7ae3a0585227f09a2 Mon Sep 17 00:00:00 2001 From: Jonghwan Choi Date: Fri, 12 Aug 2011 18:15:42 +0900 Subject: ARM: EXYNOS4: Fix wrong devname to support clkdev Signed-off-by: Jonghwan Choi Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-exynos4/clock.c b/arch/arm/mach-exynos4/clock.c index 851dea0..1561b03 100644 --- a/arch/arm/mach-exynos4/clock.c +++ b/arch/arm/mach-exynos4/clock.c @@ -520,7 +520,7 @@ static struct clk init_clocks_off[] = { .ctrlbit = (1 << 21), }, { .name = "ac97", - .id = -1, + .devname = "samsung-ac97", .enable = exynos4_clk_ip_peril_ctrl, .ctrlbit = (1 << 27), }, { -- cgit v1.1 From 6b875cb741249ec274393ba3abb929beeb465d25 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Fri, 12 Aug 2011 18:43:57 +0900 Subject: ARM: EXYNOS4: remove duplicated inclusion Remove duplicated #include('s) in arch/arm/mach-exynos4/cpu.c Signed-off-by: Huang Weiyi Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/cpu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mach-exynos4/cpu.c b/arch/arm/mach-exynos4/cpu.c index 2d8a40c..84032d3 100644 --- a/arch/arm/mach-exynos4/cpu.c +++ b/arch/arm/mach-exynos4/cpu.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include -- cgit v1.1 From 5a1993f0c64f32cb4fb8a9f6caa981f377a11710 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Fri, 12 Aug 2011 19:03:16 +0900 Subject: ARM: EXYNOS4: Fix the IRQ definitions for MIPI CSIS device This is a regression fix after migration to the external GIC. The breakage has been introduced in commit 69644a8e23ab ("ARM: EXYNOS4: modify interrupt mappings for external GIC") Signed-off-by: Sylwester Nawrocki Signed-off-by: Kyungmin Park [kgene.kim@samsung.com: added commit id] Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/include/mach/irqs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-exynos4/include/mach/irqs.h b/arch/arm/mach-exynos4/include/mach/irqs.h index 934d2a4..f8952f8 100644 --- a/arch/arm/mach-exynos4/include/mach/irqs.h +++ b/arch/arm/mach-exynos4/include/mach/irqs.h @@ -80,9 +80,8 @@ #define IRQ_HSMMC3 IRQ_SPI(76) #define IRQ_DWMCI IRQ_SPI(77) -#define IRQ_MIPICSI0 IRQ_SPI(78) - -#define IRQ_MIPICSI1 IRQ_SPI(80) +#define IRQ_MIPI_CSIS0 IRQ_SPI(78) +#define IRQ_MIPI_CSIS1 IRQ_SPI(80) #define IRQ_ONENAND_AUDI IRQ_SPI(82) #define IRQ_ROTATOR IRQ_SPI(83) -- cgit v1.1 From 7e1291dea213c46b6649a9f6ec94b16f0d88f97c Mon Sep 17 00:00:00 2001 From: Abhilash Kesavan Date: Sat, 13 Aug 2011 10:34:56 +0900 Subject: ARM: S5PV210: Fix build warning Fixed the following warning for S5PV210. arch/arm/mach-s5pv210/pm.c: In function 's5pv210_pm_add': arch/arm/mach-s5pv210/pm.c:139: warning: assignment from incompatible pointer type Also, staticized the function. Signed-off-by: Abhilash Kesavan Signed-off-by: Kukjin Kim --- arch/arm/mach-s5pv210/pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-s5pv210/pm.c b/arch/arm/mach-s5pv210/pm.c index 309e388..f149d27 100644 --- a/arch/arm/mach-s5pv210/pm.c +++ b/arch/arm/mach-s5pv210/pm.c @@ -88,7 +88,7 @@ static struct sleep_save s5pv210_core_save[] = { SAVE_ITEM(S3C2410_TCNTO(0)), }; -void s5pv210_cpu_suspend(unsigned long arg) +static int s5pv210_cpu_suspend(unsigned long arg) { unsigned long tmp; -- cgit v1.1 From f98d429d7a7ff43d6e7c9bab239223f44a85264e Mon Sep 17 00:00:00 2001 From: Abhilash Kesavan Date: Sat, 13 Aug 2011 10:40:52 +0900 Subject: ARM: S3C64XX: Fix build break in PM debug When S3C_PM_DEBUG_LED_SMDK is enabled for suspend/resume debugging, the following compilation error occurs: arch/arm/mach-s3c64xx/pm.c: In function 's3c_pm_debug_smdkled': arch/arm/mach-s3c64xx/pm.c:41: error: implicit declaration of function 'gpio_set_value' arch/arm/mach-s3c64xx/pm.c:41: error: implicit declaration of function 'S3C64XX_GPN' arch/arm/mach-s3c64xx/pm.c: In function 's3c64xx_pm_init': arch/arm/mach-s3c64xx/pm.c:184: error: implicit declaration of function 'gpio_request' arch/arm/mach-s3c64xx/pm.c:188: error: implicit declaration of function 'gpio_direction_output' Fix the error by including linux/gpio.h Signed-off-by: Abhilash Kesavan Signed-off-by: Kukjin Kim --- arch/arm/mach-s3c64xx/pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-s3c64xx/pm.c b/arch/arm/mach-s3c64xx/pm.c index 8bad643..055e285 100644 --- a/arch/arm/mach-s3c64xx/pm.c +++ b/arch/arm/mach-s3c64xx/pm.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include -- cgit v1.1 From 995b528ad25968472742c50fe964d44fac2b857a Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 18 Aug 2011 13:02:12 +0900 Subject: ARM: SAMSUNG: Add chained enrty/exit call to timer interrupt handler This patch adds chained IRQ enter/exit functions to timer interrupt handler in order to function correctly on primary controllers with different methods of flow control. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Signed-off-by: Kukjin Kim --- arch/arm/plat-samsung/irq-vic-timer.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/plat-samsung/irq-vic-timer.c b/arch/arm/plat-samsung/irq-vic-timer.c index f714d06..51583cd 100644 --- a/arch/arm/plat-samsung/irq-vic-timer.c +++ b/arch/arm/plat-samsung/irq-vic-timer.c @@ -22,9 +22,14 @@ #include #include +#include + static void s3c_irq_demux_vic_timer(unsigned int irq, struct irq_desc *desc) { + struct irq_chip *chip = irq_get_chip(irq); + chained_irq_enter(chip, desc); generic_handle_irq((int)desc->irq_data.handler_data); + chained_irq_exit(chip, desc); } /* We assume the IRQ_TIMER0..IRQ_TIMER4 range is continuous. */ -- cgit v1.1 From 70b0e82bc7d03d33de5bceea92d419a9be4340ee Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Sat, 13 Aug 2011 12:55:36 +0900 Subject: ARM: EXYNOS4: add required chained_irq_enter/exit to eint code This patch adds chained IRQ enter/exit functions to external interrupt handler in order to function correctly on primary controllers with different methods of flow control. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/irq-eint.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/mach-exynos4/irq-eint.c b/arch/arm/mach-exynos4/irq-eint.c index 9d87d2a..badb8c6 100644 --- a/arch/arm/mach-exynos4/irq-eint.c +++ b/arch/arm/mach-exynos4/irq-eint.c @@ -23,6 +23,8 @@ #include +#include + static DEFINE_SPINLOCK(eint_lock); static unsigned int eint0_15_data[16]; @@ -184,8 +186,11 @@ static inline void exynos4_irq_demux_eint(unsigned int start) static void exynos4_irq_demux_eint16_31(unsigned int irq, struct irq_desc *desc) { + struct irq_chip *chip = irq_get_chip(irq); + chained_irq_enter(chip, desc); exynos4_irq_demux_eint(IRQ_EINT(16)); exynos4_irq_demux_eint(IRQ_EINT(24)); + chained_irq_exit(chip, desc); } static void exynos4_irq_eint0_15(unsigned int irq, struct irq_desc *desc) @@ -193,6 +198,7 @@ static void exynos4_irq_eint0_15(unsigned int irq, struct irq_desc *desc) u32 *irq_data = irq_get_handler_data(irq); struct irq_chip *chip = irq_get_chip(irq); + chained_irq_enter(chip, desc); chip->irq_mask(&desc->irq_data); if (chip->irq_ack) @@ -201,6 +207,7 @@ static void exynos4_irq_eint0_15(unsigned int irq, struct irq_desc *desc) generic_handle_irq(*irq_data); chip->irq_unmask(&desc->irq_data); + chained_irq_exit(chip, desc); } int __init exynos4_init_irq_eint(void) -- cgit v1.1 From 3f6065dd9d2c947c8d68336f07bd721d3909a30d Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Sat, 13 Aug 2011 12:55:36 +0900 Subject: ARM: S5P: add required chained_irq_enter/exit to gpio-int code This patch adds chained IRQ enter/exit functions to gpio interrupt handler in order to function correctly on primary controllers with different methods of flow control. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Signed-off-by: Kukjin Kim --- arch/arm/plat-s5p/irq-gpioint.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/plat-s5p/irq-gpioint.c b/arch/arm/plat-s5p/irq-gpioint.c index 327ab9f..f71078e 100644 --- a/arch/arm/plat-s5p/irq-gpioint.c +++ b/arch/arm/plat-s5p/irq-gpioint.c @@ -23,6 +23,8 @@ #include #include +#include + #define GPIO_BASE(chip) (((unsigned long)(chip)->base) & 0xFFFFF000u) #define CON_OFFSET 0x700 @@ -81,6 +83,9 @@ static void s5p_gpioint_handler(unsigned int irq, struct irq_desc *desc) int group, pend_offset, mask_offset; unsigned int pend, mask; + struct irq_chip *chip = irq_get_chip(irq); + chained_irq_enter(chip, desc); + for (group = 0; group < bank->nr_groups; group++) { struct s3c_gpio_chip *chip = bank->chips[group]; if (!chip) @@ -102,6 +107,7 @@ static void s5p_gpioint_handler(unsigned int irq, struct irq_desc *desc) pend &= ~BIT(offset); } } + chained_irq_exit(chip, desc); } static __init int s5p_gpioint_add(struct s3c_gpio_chip *chip) -- cgit v1.1 From 2b431ff74a850db3d5b804be3ac466b6ed7f516d Mon Sep 17 00:00:00 2001 From: Yulgon Kim Date: Thu, 18 Aug 2011 20:40:24 +0900 Subject: ARM: EXYNOS4: Increase reset delay for USB HOST PHY This patch increases reset delay from 50 usec to 80 usec for USB HOST PHY. In order to reset USB HOST PHY controller properly, a little extra time is required during its reset cycle. Signed-off-by: Yulgon Kim Signed-off-by: Jingoo Han Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/setup-usb-phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-exynos4/setup-usb-phy.c b/arch/arm/mach-exynos4/setup-usb-phy.c index 0883c1b..39aca04 100644 --- a/arch/arm/mach-exynos4/setup-usb-phy.c +++ b/arch/arm/mach-exynos4/setup-usb-phy.c @@ -82,7 +82,7 @@ static int exynos4_usb_phy1_init(struct platform_device *pdev) rstcon &= ~(HOST_LINK_PORT_SWRST_MASK | PHY1_SWRST_MASK); writel(rstcon, EXYNOS4_RSTCON); - udelay(50); + udelay(80); clk_disable(otg_clk); clk_put(otg_clk); -- cgit v1.1 From d2edddf2b25863ec0893635662b0832f9965b543 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Fri, 19 Aug 2011 20:25:05 +0900 Subject: ARM: EXYNOS4: Add restart hook for proper reboot This is required to use SWRESET. Signed-off-by: Kyungmin Park Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/cpu.c | 10 ++++++++++ arch/arm/mach-exynos4/include/mach/regs-pmu.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/arch/arm/mach-exynos4/cpu.c b/arch/arm/mach-exynos4/cpu.c index 84032d3..746d6fc 100644 --- a/arch/arm/mach-exynos4/cpu.c +++ b/arch/arm/mach-exynos4/cpu.c @@ -27,8 +27,10 @@ #include #include #include +#include #include +#include extern int combiner_init(unsigned int combiner_nr, void __iomem *base, unsigned int irq_start); @@ -127,6 +129,11 @@ static void exynos4_idle(void) local_irq_enable(); } +static void exynos4_sw_reset(void) +{ + __raw_writel(0x1, S5P_SWRESET); +} + /* * exynos4_map_io * @@ -240,5 +247,8 @@ int __init exynos4_init(void) /* set idle function */ pm_idle = exynos4_idle; + /* set sw_reset function */ + s5p_reset_hook = exynos4_sw_reset; + return sysdev_register(&exynos4_sysdev); } diff --git a/arch/arm/mach-exynos4/include/mach/regs-pmu.h b/arch/arm/mach-exynos4/include/mach/regs-pmu.h index fa49bbb..cdf9b47 100644 --- a/arch/arm/mach-exynos4/include/mach/regs-pmu.h +++ b/arch/arm/mach-exynos4/include/mach/regs-pmu.h @@ -29,6 +29,8 @@ #define S5P_USE_STANDBY_WFE1 (1 << 25) #define S5P_USE_MASK ((0x3 << 16) | (0x3 << 24)) +#define S5P_SWRESET S5P_PMUREG(0x0400) + #define S5P_WAKEUP_STAT S5P_PMUREG(0x0600) #define S5P_EINT_WAKEUP_MASK S5P_PMUREG(0x0604) #define S5P_WAKEUP_MASK S5P_PMUREG(0x0608) -- cgit v1.1 From 5d747c6f2c9e1615685866251416268a0f648ffc Mon Sep 17 00:00:00 2001 From: Naveen Krishna Chatradhi Date: Fri, 19 Aug 2011 20:52:29 +0900 Subject: ARM: S5P: fix bug in spdif_clk_get_rate Should be passing the parent clk object when calling for parent rate. Signed-off-by: Naveen Krishna Chatradhi Signed-off-by: Kukjin Kim --- arch/arm/plat-s5p/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-s5p/clock.c b/arch/arm/plat-s5p/clock.c index 02af235..5f84a3f 100644 --- a/arch/arm/plat-s5p/clock.c +++ b/arch/arm/plat-s5p/clock.c @@ -192,7 +192,7 @@ unsigned long s5p_spdif_get_rate(struct clk *clk) if (IS_ERR(pclk)) return -EINVAL; - rate = pclk->ops->get_rate(clk); + rate = pclk->ops->get_rate(pclk); clk_put(pclk); return rate; -- cgit v1.1 From bb0822954aab7d23a3f902c2a103ee0242f6046e Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Tue, 16 Aug 2011 13:37:14 -0600 Subject: squeeze max-pause area and drop pass-good area Revert the pass-good area introduced in ffd1f609ab10 ("writeback: introduce max-pause and pass-good dirty limits") and make the max-pause area smaller and safe. This fixes ~30% performance regression in the ext3 data=writeback fio_mmap_randwrite_64k/fio_mmap_randrw_64k test cases, where there are 12 JBOD disks, on each disk runs 8 concurrent tasks doing reads+writes. Using deadline scheduler also has a regression, but not that big as CFQ, so this suggests we have some write starvation. The test logs show that - the disks are sometimes under utilized - global dirty pages sometimes rush high to the pass-good area for several hundred seconds, while in the mean time some bdi dirty pages drop to very low value (bdi_dirty << bdi_thresh). Then suddenly the global dirty pages dropped under global dirty threshold and bdi_dirty rush very high (for example, 2 times higher than bdi_thresh). During which time balance_dirty_pages() is not called at all. So the problems are 1) The random writes progress so slow that they break the assumption of the max-pause logic that "8 pages per 200ms is typically more than enough to curb heavy dirtiers". 2) The max-pause logic ignored task_bdi_thresh and thus opens the possibility for some bdi's to over dirty pages, leading to (bdi_dirty >> bdi_thresh) and then (bdi_thresh >> bdi_dirty) for others. 3) The higher max-pause/pass-good thresholds somehow leads to the bad swing of dirty pages. The fix is to allow the task to slightly dirty over task_bdi_thresh, but no way to exceed bdi_dirty and/or global dirty_thresh. Tests show that it fixed the JBOD regression completely (both behavior and performance), while still being able to cut down large pause times in balance_dirty_pages() for single-disk cases. Reported-by: Li Shaohua Tested-by: Li Shaohua Acked-by: Jan Kara Signed-off-by: Wu Fengguang --- include/linux/writeback.h | 11 ----------- mm/page-writeback.c | 15 ++------------- 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f1bfa12e..2b8963f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -12,15 +12,6 @@ * * (thresh - thresh/DIRTY_FULL_SCOPE, thresh) * - * The 1/16 region above the global dirty limit will be put to maximum pauses: - * - * (limit, limit + limit/DIRTY_MAXPAUSE_AREA) - * - * The 1/16 region above the max-pause region, dirty exceeded bdi's will be put - * to loops: - * - * (limit + limit/DIRTY_MAXPAUSE_AREA, limit + limit/DIRTY_PASSGOOD_AREA) - * * Further beyond, all dirtier tasks will enter a loop waiting (possibly long * time) for the dirty pages to drop, unless written enough pages. * @@ -31,8 +22,6 @@ */ #define DIRTY_SCOPE 8 #define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2) -#define DIRTY_MAXPAUSE_AREA 16 -#define DIRTY_PASSGOOD_AREA 8 /* * 4MB minimal write chunk size diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d196074..0e309cd 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -754,21 +754,10 @@ static void balance_dirty_pages(struct address_space *mapping, * 200ms is typically more than enough to curb heavy dirtiers; * (b) the pause time limit makes the dirtiers more responsive. */ - if (nr_dirty < dirty_thresh + - dirty_thresh / DIRTY_MAXPAUSE_AREA && + if (nr_dirty < dirty_thresh && + bdi_dirty < (task_bdi_thresh + bdi_thresh) / 2 && time_after(jiffies, start_time + MAX_PAUSE)) break; - /* - * pass-good area. When some bdi gets blocked (eg. NFS server - * not responding), or write bandwidth dropped dramatically due - * to concurrent reads, or dirty threshold suddenly dropped and - * the dirty pages cannot be brought down anytime soon (eg. on - * slow USB stick), at least let go of the good bdi's. - */ - if (nr_dirty < dirty_thresh + - dirty_thresh / DIRTY_PASSGOOD_AREA && - bdi_dirty < bdi_thresh) - break; /* * Increase the delay for each loop, up to our previous -- cgit v1.1 From 63b37de12889b7b96463b7d6de6d3f3704486b91 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 16 Aug 2011 15:36:21 -0400 Subject: cpupower: fix Makefile typo Signed-off-by: Dave Jones Signed-off-by: Dominik Brodowski --- tools/power/cpupower/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 11521d2..edb021c 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -35,7 +35,7 @@ NLS ?= true # Set the following to 'true' to build/install the # cpufreq-bench benchmarking tool -CPUFRQ_BENCH ?= true +CPUFREQ_BENCH ?= true # Prefix to the directories we're installing to DESTDIR ?= @@ -139,7 +139,7 @@ ifeq ($(strip $(NLS)),true) COMPILE_NLS += create-gmo endif -ifeq ($(strip $(CPUFRQ_BENCH)),true) +ifeq ($(strip $(CPUFREQ_BENCH)),true) INSTALL_BENCH += install-bench COMPILE_BENCH += compile-bench endif -- cgit v1.1 From 47c336307a3680cfdf4adbe718d79f3fe66702ea Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 19 Aug 2011 17:00:02 +0200 Subject: cpupower: make NLS truly optional Loosely based on a patch for cpufrequtils, submittted by Sergey Dryabzhinsky and signed-off-by: Matt Turner Signed-off-by: Dominik Brodowski --- tools/power/cpupower/Makefile | 1 + tools/power/cpupower/utils/helpers/helpers.h | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index edb021c..e8a03ac 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -137,6 +137,7 @@ CFLAGS += -pipe ifeq ($(strip $(NLS)),true) INSTALL_NLS += install-gmo COMPILE_NLS += create-gmo + CFLAGS += -DNLS endif ifeq ($(strip $(CPUFREQ_BENCH)),true) diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 7a83022..2747e73 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -16,11 +16,20 @@ #include "helpers/bitmask.h" /* Internationalization ****************************/ +#ifdef NLS + #define _(String) gettext(String) #ifndef gettext_noop #define gettext_noop(String) String #endif #define N_(String) gettext_noop(String) + +#else /* !NLS */ + +#define _(String) String +#define N_(String) String + +#endif /* Internationalization ****************************/ extern int run_as_root; -- cgit v1.1 From 498ca793d90aef8ad38a852a969c257f62832738 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 6 Aug 2011 18:11:43 +0200 Subject: cpupower: use man(1) when calling "cpupower help subcommand" Instead of printing something non-formatted to stdout, call man(1) to show the man page for the proper subcommand. Signed-off-by: Dominik Brodowski --- tools/power/cpupower/man/cpupower-frequency-info.1 | 6 +- tools/power/cpupower/man/cpupower-frequency-set.1 | 8 +- tools/power/cpupower/man/cpupower.1 | 14 ++-- tools/power/cpupower/utils/builtin.h | 7 -- tools/power/cpupower/utils/cpufreq-info.c | 42 +--------- tools/power/cpupower/utils/cpufreq-set.c | 29 +------ tools/power/cpupower/utils/cpuidle-info.c | 24 +----- tools/power/cpupower/utils/cpupower-info.c | 20 +---- tools/power/cpupower/utils/cpupower-set.c | 25 +----- tools/power/cpupower/utils/cpupower.c | 91 ++++++++++++---------- .../cpupower/utils/idle_monitor/cpupower-monitor.c | 48 ++++-------- 11 files changed, 86 insertions(+), 228 deletions(-) diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1 index 3194811..bb60a8d 100644 --- a/tools/power/cpupower/man/cpupower-frequency-info.1 +++ b/tools/power/cpupower/man/cpupower-frequency-info.1 @@ -1,10 +1,10 @@ -.TH "cpufreq-info" "1" "0.1" "Mattia Dongili" "" +.TH "cpupower-frequency-info" "1" "0.1" "Mattia Dongili" "" .SH "NAME" .LP -cpufreq\-info \- Utility to retrieve cpufreq kernel information +cpupower frequency\-info \- Utility to retrieve cpufreq kernel information .SH "SYNTAX" .LP -cpufreq\-info [\fIoptions\fP] +cpupower [ \-c cpulist ] frequency\-info [\fIoptions\fP] .SH "DESCRIPTION" .LP A small tool which prints out cpufreq information helpful to developers and interested users. diff --git a/tools/power/cpupower/man/cpupower-frequency-set.1 b/tools/power/cpupower/man/cpupower-frequency-set.1 index 26e3e13..685f469 100644 --- a/tools/power/cpupower/man/cpupower-frequency-set.1 +++ b/tools/power/cpupower/man/cpupower-frequency-set.1 @@ -1,13 +1,13 @@ -.TH "cpufreq-set" "1" "0.1" "Mattia Dongili" "" +.TH "cpupower-freqency-set" "1" "0.1" "Mattia Dongili" "" .SH "NAME" .LP -cpufreq\-set \- A small tool which allows to modify cpufreq settings. +cpupower frequency\-set \- A small tool which allows to modify cpufreq settings. .SH "SYNTAX" .LP -cpufreq\-set [\fIoptions\fP] +cpupower [ \-c cpu ] frequency\-set [\fIoptions\fP] .SH "DESCRIPTION" .LP -cpufreq\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time. +cpupower frequency\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time. .SH "OPTIONS" .LP .TP diff --git a/tools/power/cpupower/man/cpupower.1 b/tools/power/cpupower/man/cpupower.1 index 78c20fe..baf741d 100644 --- a/tools/power/cpupower/man/cpupower.1 +++ b/tools/power/cpupower/man/cpupower.1 @@ -3,7 +3,7 @@ cpupower \- Shows and sets processor power related values .SH SYNOPSIS .ft B -.B cpupower [ \-c cpulist ] subcommand [ARGS] +.B cpupower [ \-c cpulist ] [ARGS] .B cpupower \-v|\-\-version @@ -13,24 +13,24 @@ cpupower \- Shows and sets processor power related values \fBcpupower \fP is a collection of tools to examine and tune power saving related features of your processor. -The manpages of the subcommands (cpupower\-(1)) provide detailed +The manpages of the commands (cpupower\-(1)) provide detailed descriptions of supported features. Run \fBcpupower help\fP to get an overview -of supported subcommands. +of supported commands. .SH Options .PP \-\-help, \-h .RS 4 -Shows supported subcommands and general usage. +Shows supported commands and general usage. .RE .PP \-\-cpu cpulist, \-c cpulist .RS 4 Only show or set values for specific cores. -This option is not supported by all subcommands, details can be found in the -manpages of the subcommands. +This option is not supported by all commands, details can be found in the +manpages of the commands. -Some subcommands access all cores (typically the *\-set commands), some only +Some commands access all cores (typically the *\-set commands), some only the first core (typically the *\-info commands) by default. The syntax for is based on how the kernel exports CPU bitmasks via diff --git a/tools/power/cpupower/utils/builtin.h b/tools/power/cpupower/utils/builtin.h index c870ffb..c10496f 100644 --- a/tools/power/cpupower/utils/builtin.h +++ b/tools/power/cpupower/utils/builtin.h @@ -8,11 +8,4 @@ extern int cmd_freq_info(int argc, const char **argv); extern int cmd_idle_info(int argc, const char **argv); extern int cmd_monitor(int argc, const char **argv); -extern void set_help(void); -extern void info_help(void); -extern void freq_set_help(void); -extern void freq_info_help(void); -extern void idle_info_help(void); -extern void monitor_help(void); - #endif diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index 5a1d25f..28953c9 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -510,37 +510,6 @@ static int get_latency(unsigned int cpu, unsigned int human) return 0; } -void freq_info_help(void) -{ - printf(_("Usage: cpupower freqinfo [options]\n")); - printf(_("Options:\n")); - printf(_(" -e, --debug Prints out debug information [default]\n")); - printf(_(" -f, --freq Get frequency the CPU currently runs at, according\n" - " to the cpufreq core *\n")); - printf(_(" -w, --hwfreq Get frequency the CPU currently runs at, by reading\n" - " it from hardware (only available to root) *\n")); - printf(_(" -l, --hwlimits Determine the minimum and maximum CPU frequency allowed *\n")); - printf(_(" -d, --driver Determines the used cpufreq kernel driver *\n")); - printf(_(" -p, --policy Gets the currently used cpufreq policy *\n")); - printf(_(" -g, --governors Determines available cpufreq governors *\n")); - printf(_(" -r, --related-cpus Determines which CPUs run at the same hardware frequency *\n")); - printf(_(" -a, --affected-cpus Determines which CPUs need to have their frequency\n" - " coordinated by software *\n")); - printf(_(" -s, --stats Shows cpufreq statistics if available\n")); - printf(_(" -y, --latency Determines the maximum latency on CPU frequency changes *\n")); - printf(_(" -b, --boost Checks for turbo or boost modes *\n")); - printf(_(" -o, --proc Prints out information like provided by the /proc/cpufreq\n" - " interface in 2.4. and early 2.6. kernels\n")); - printf(_(" -m, --human human-readable output for the -f, -w, -s and -y parameters\n")); - printf(_(" -h, --help Prints out this screen\n")); - - printf("\n"); - printf(_("If no argument is given, full output about\n" - "cpufreq is printed which is useful e.g. for reporting bugs.\n\n")); - printf(_("By default info of CPU 0 is shown which can be overridden\n" - "with the cpupower --cpu main command option.\n")); -} - static struct option info_opts[] = { { .name = "debug", .has_arg = no_argument, .flag = NULL, .val = 'e'}, { .name = "boost", .has_arg = no_argument, .flag = NULL, .val = 'b'}, @@ -556,7 +525,6 @@ static struct option info_opts[] = { { .name = "latency", .has_arg = no_argument, .flag = NULL, .val = 'y'}, { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, { .name = "human", .has_arg = no_argument, .flag = NULL, .val = 'm'}, - { .name = "help", .has_arg = no_argument, .flag = NULL, .val = 'h'}, { }, }; @@ -570,16 +538,12 @@ int cmd_freq_info(int argc, char **argv) int output_param = 0; do { - ret = getopt_long(argc, argv, "hoefwldpgrasmyb", info_opts, NULL); + ret = getopt_long(argc, argv, "oefwldpgrasmyb", info_opts, NULL); switch (ret) { case '?': output_param = '?'; cont = 0; break; - case 'h': - output_param = 'h'; - cont = 0; - break; case -1: cont = 0; break; @@ -642,11 +606,7 @@ int cmd_freq_info(int argc, char **argv) return -EINVAL; case '?': printf(_("invalid or unknown argument\n")); - freq_info_help(); return -EINVAL; - case 'h': - freq_info_help(); - return EXIT_SUCCESS; case 'o': proc_cpufreq_output(); return EXIT_SUCCESS; diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index 5f78362..dd1539e 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -20,34 +20,11 @@ #define NORM_FREQ_LEN 32 -void freq_set_help(void) -{ - printf(_("Usage: cpupower frequency-set [options]\n")); - printf(_("Options:\n")); - printf(_(" -d FREQ, --min FREQ new minimum CPU frequency the governor may select\n")); - printf(_(" -u FREQ, --max FREQ new maximum CPU frequency the governor may select\n")); - printf(_(" -g GOV, --governor GOV new cpufreq governor\n")); - printf(_(" -f FREQ, --freq FREQ specific frequency to be set. Requires userspace\n" - " governor to be available and loaded\n")); - printf(_(" -r, --related Switches all hardware-related CPUs\n")); - printf(_(" -h, --help Prints out this screen\n")); - printf("\n"); - printf(_("Notes:\n" - "1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n")); - printf(_("2. The -f FREQ, --freq FREQ parameter cannot be combined with any other parameter\n" - " except the -c CPU, --cpu CPU parameter\n" - "3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n" - " by postfixing the value with the wanted unit name, without any space\n" - " (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n")); - -} - static struct option set_opts[] = { { .name = "min", .has_arg = required_argument, .flag = NULL, .val = 'd'}, { .name = "max", .has_arg = required_argument, .flag = NULL, .val = 'u'}, { .name = "governor", .has_arg = required_argument, .flag = NULL, .val = 'g'}, { .name = "freq", .has_arg = required_argument, .flag = NULL, .val = 'f'}, - { .name = "help", .has_arg = no_argument, .flag = NULL, .val = 'h'}, { .name = "related", .has_arg = no_argument, .flag = NULL, .val='r'}, { }, }; @@ -80,7 +57,6 @@ const struct freq_units def_units[] = { static void print_unknown_arg(void) { printf(_("invalid or unknown argument\n")); - freq_set_help(); } static unsigned long string_to_frequency(const char *str) @@ -231,14 +207,11 @@ int cmd_freq_set(int argc, char **argv) /* parameter parsing */ do { - ret = getopt_long(argc, argv, "d:u:g:f:hr", set_opts, NULL); + ret = getopt_long(argc, argv, "d:u:g:f:r", set_opts, NULL); switch (ret) { case '?': print_unknown_arg(); return -EINVAL; - case 'h': - freq_set_help(); - return 0; case -1: cont = 0; break; diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index 70da357..b028267 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -139,30 +139,14 @@ static void proc_cpuidle_cpu_output(unsigned int cpu) } } -/* --freq / -f */ - -void idle_info_help(void) -{ - printf(_ ("Usage: cpupower idleinfo [options]\n")); - printf(_ ("Options:\n")); - printf(_ (" -s, --silent Only show general C-state information\n")); - printf(_ (" -o, --proc Prints out information like provided by the /proc/acpi/processor/*/power\n" - " interface in older kernels\n")); - printf(_ (" -h, --help Prints out this screen\n")); - - printf("\n"); -} - static struct option info_opts[] = { { .name = "silent", .has_arg = no_argument, .flag = NULL, .val = 's'}, { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, - { .name = "help", .has_arg = no_argument, .flag = NULL, .val = 'h'}, { }, }; static inline void cpuidle_exit(int fail) { - idle_info_help(); exit(EXIT_FAILURE); } @@ -174,7 +158,7 @@ int cmd_idle_info(int argc, char **argv) unsigned int cpu = 0; do { - ret = getopt_long(argc, argv, "hos", info_opts, NULL); + ret = getopt_long(argc, argv, "os", info_opts, NULL); if (ret == -1) break; switch (ret) { @@ -182,10 +166,6 @@ int cmd_idle_info(int argc, char **argv) output_param = '?'; cont = 0; break; - case 'h': - output_param = 'h'; - cont = 0; - break; case 's': verbose = 0; break; @@ -211,8 +191,6 @@ int cmd_idle_info(int argc, char **argv) case '?': printf(_("invalid or unknown argument\n")); cpuidle_exit(EXIT_FAILURE); - case 'h': - cpuidle_exit(EXIT_SUCCESS); } /* Default is: show output of CPU 0 only */ diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c index 85253cb..3f68632 100644 --- a/tools/power/cpupower/utils/cpupower-info.c +++ b/tools/power/cpupower/utils/cpupower-info.c @@ -16,31 +16,16 @@ #include "helpers/helpers.h" #include "helpers/sysfs.h" -void info_help(void) -{ - printf(_("Usage: cpupower info [ -b ] [ -m ] [ -s ]\n")); - printf(_("Options:\n")); - printf(_(" -b, --perf-bias Gets CPU's power vs performance policy on some\n" - " Intel models [0-15], see manpage for details\n")); - printf(_(" -m, --sched-mc Gets the kernel's multi core scheduler policy.\n")); - printf(_(" -s, --sched-smt Gets the kernel's thread sibling scheduler policy.\n")); - printf(_(" -h, --help Prints out this screen\n")); - printf(_("\nPassing no option will show all info, by default only on core 0\n")); - printf("\n"); -} - static struct option set_opts[] = { { .name = "perf-bias", .has_arg = optional_argument, .flag = NULL, .val = 'b'}, { .name = "sched-mc", .has_arg = optional_argument, .flag = NULL, .val = 'm'}, { .name = "sched-smt", .has_arg = optional_argument, .flag = NULL, .val = 's'}, - { .name = "help", .has_arg = no_argument, .flag = NULL, .val = 'h'}, { }, }; static void print_wrong_arg_exit(void) { printf(_("invalid or unknown argument\n")); - info_help(); exit(EXIT_FAILURE); } @@ -64,11 +49,8 @@ int cmd_info(int argc, char **argv) textdomain(PACKAGE); /* parameter parsing */ - while ((ret = getopt_long(argc, argv, "msbh", set_opts, NULL)) != -1) { + while ((ret = getopt_long(argc, argv, "msb", set_opts, NULL)) != -1) { switch (ret) { - case 'h': - info_help(); - return 0; case 'b': if (params.perf_bias) print_wrong_arg_exit(); diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index bc1b391..dc4de37 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -17,30 +17,16 @@ #include "helpers/sysfs.h" #include "helpers/bitmask.h" -void set_help(void) -{ - printf(_("Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n")); - printf(_("Options:\n")); - printf(_(" -b, --perf-bias [VAL] Sets CPU's power vs performance policy on some\n" - " Intel models [0-15], see manpage for details\n")); - printf(_(" -m, --sched-mc [VAL] Sets the kernel's multi core scheduler policy.\n")); - printf(_(" -s, --sched-smt [VAL] Sets the kernel's thread sibling scheduler policy.\n")); - printf(_(" -h, --help Prints out this screen\n")); - printf("\n"); -} - static struct option set_opts[] = { { .name = "perf-bias", .has_arg = optional_argument, .flag = NULL, .val = 'b'}, { .name = "sched-mc", .has_arg = optional_argument, .flag = NULL, .val = 'm'}, { .name = "sched-smt", .has_arg = optional_argument, .flag = NULL, .val = 's'}, - { .name = "help", .has_arg = no_argument, .flag = NULL, .val = 'h'}, { }, }; static void print_wrong_arg_exit(void) { printf(_("invalid or unknown argument\n")); - set_help(); exit(EXIT_FAILURE); } @@ -66,12 +52,9 @@ int cmd_set(int argc, char **argv) params.params = 0; /* parameter parsing */ - while ((ret = getopt_long(argc, argv, "m:s:b:h", + while ((ret = getopt_long(argc, argv, "m:s:b:", set_opts, NULL)) != -1) { switch (ret) { - case 'h': - set_help(); - return 0; case 'b': if (params.perf_bias) print_wrong_arg_exit(); @@ -110,10 +93,8 @@ int cmd_set(int argc, char **argv) } }; - if (!params.params) { - set_help(); - return -EINVAL; - } + if (!params.params) + print_wrong_arg_exit(); if (params.sched_mc) { ret = sysfs_set_sched("mc", sched_mc); diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c index 5844ae0..52bee59 100644 --- a/tools/power/cpupower/utils/cpupower.c +++ b/tools/power/cpupower/utils/cpupower.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "builtin.h" #include "helpers/helpers.h" @@ -19,13 +20,12 @@ struct cmd_struct { const char *cmd; int (*main)(int, const char **); - void (*usage)(void); int needs_root; }; #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) -int cmd_help(int argc, const char **argv); +static int cmd_help(int argc, const char **argv); /* Global cpu_info object available for all binaries * Info only retrieved from CPU 0 @@ -44,55 +44,66 @@ int be_verbose; static void print_help(void); static struct cmd_struct commands[] = { - { "frequency-info", cmd_freq_info, freq_info_help, 0 }, - { "frequency-set", cmd_freq_set, freq_set_help, 1 }, - { "idle-info", cmd_idle_info, idle_info_help, 0 }, - { "set", cmd_set, set_help, 1 }, - { "info", cmd_info, info_help, 0 }, - { "monitor", cmd_monitor, monitor_help, 0 }, - { "help", cmd_help, print_help, 0 }, - /* { "bench", cmd_bench, NULL, 1 }, */ + { "frequency-info", cmd_freq_info, 0 }, + { "frequency-set", cmd_freq_set, 1 }, + { "idle-info", cmd_idle_info, 0 }, + { "set", cmd_set, 1 }, + { "info", cmd_info, 0 }, + { "monitor", cmd_monitor, 0 }, + { "help", cmd_help, 0 }, + /* { "bench", cmd_bench, 1 }, */ }; -int cmd_help(int argc, const char **argv) -{ - unsigned int i; - - if (argc > 1) { - for (i = 0; i < ARRAY_SIZE(commands); i++) { - struct cmd_struct *p = commands + i; - if (strcmp(p->cmd, argv[1])) - continue; - if (p->usage) { - p->usage(); - return EXIT_SUCCESS; - } - } - } - print_help(); - if (argc == 1) - return EXIT_SUCCESS; /* cpupower help */ - return EXIT_FAILURE; -} - static void print_help(void) { unsigned int i; #ifdef DEBUG - printf(_("cpupower [ -d ][ -c cpulist ] subcommand [ARGS]\n")); - printf(_(" -d, --debug May increase output (stderr) on some subcommands\n")); + printf(_("Usage:\tcpupower [-d|--debug] [-c|--cpu cpulist ] []\n")); #else - printf(_("cpupower [ -c cpulist ] subcommand [ARGS]\n")); + printf(_("Usage:\tcpupower [-c|--cpu cpulist ] []\n")); #endif - printf(_("cpupower --version\n")); - printf(_("Supported subcommands are:\n")); + printf(_("Supported commands are:\n")); for (i = 0; i < ARRAY_SIZE(commands); i++) printf("\t%s\n", commands[i].cmd); - printf(_("\nSome subcommands can make use of the -c cpulist option.\n")); - printf(_("Look at the general cpupower manpage how to use it\n")); - printf(_("and read up the subcommand's manpage whether it is supported.\n")); - printf(_("\nUse cpupower help subcommand for getting help for above subcommands.\n")); + printf(_("\nNot all commands can make use of the -c cpulist option.\n")); + printf(_("\nUse 'cpupower help ' for getting help for above commands.\n")); +} + +static int print_man_page(const char *subpage) +{ + int len; + char *page; + + len = 10; /* enough for "cpupower-" */ + if (subpage != NULL) + len += strlen(subpage); + + page = malloc(len); + if (!page) + return -ENOMEM; + + sprintf(page, "cpupower"); + if ((subpage != NULL) && strcmp(subpage, "help")) { + strcat(page, "-"); + strcat(page, subpage); + } + + execlp("man", "man", page, NULL); + + /* should not be reached */ + return -EINVAL; +} + +static int cmd_help(int argc, const char **argv) +{ + if (argc > 1) { + print_man_page(argv[1]); /* exits within execlp() */ + return EXIT_FAILURE; + } + + print_help(); + return EXIT_SUCCESS; } static void print_version(void) diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 6cb8d9e..0d6571e 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -43,6 +43,12 @@ static struct cpupower_topology cpu_top; /* ToDo: Document this in the manpage */ static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', }; +static void print_wrong_arg_exit(void) +{ + printf(_("invalid or unknown argument\n")); + exit(EXIT_FAILURE); +} + long long timespec_diff_us(struct timespec start, struct timespec end) { struct timespec temp; @@ -56,21 +62,6 @@ long long timespec_diff_us(struct timespec start, struct timespec end) return (temp.tv_sec * 1000000) + (temp.tv_nsec / 1000); } -void monitor_help(void) -{ - printf(_("cpupower monitor: [-m ,[],.. ] command\n")); - printf(_("cpupower monitor: [-m ,[],.. ] [ -i interval_sec ]\n")); - printf(_("cpupower monitor: -l\n")); - printf(_("\t command: pass an arbitrary command to measure specific workload\n")); - printf(_("\t -i: time intervall to measure for in seconds (default 1)\n")); - printf(_("\t -l: list available CPU sleep monitors (for use with -m)\n")); - printf(_("\t -m: show specific CPU sleep monitors only (in same order)\n")); - printf(_("\t -h: print this help\n")); - printf("\n"); - printf(_("only one of: -l, -m are allowed\nIf none of them is passed,")); - printf(_(" all supported monitors are shown\n")); -} - void print_n_spaces(int n) { int x; @@ -246,7 +237,6 @@ static void parse_monitor_param(char *param) if (hits == 0) { printf(_("No matching monitor found in %s, " "try -l option\n"), param); - monitor_help(); exit(EXIT_FAILURE); } /* Override detected/registerd monitors array with requested one */ @@ -343,37 +333,27 @@ static void cmdline(int argc, char *argv[]) int opt; progname = basename(argv[0]); - while ((opt = getopt(argc, argv, "+hli:m:")) != -1) { + while ((opt = getopt(argc, argv, "+li:m:")) != -1) { switch (opt) { - case 'h': - monitor_help(); - exit(EXIT_SUCCESS); case 'l': - if (mode) { - monitor_help(); - exit(EXIT_FAILURE); - } + if (mode) + print_wrong_arg_exit(); mode = list; break; case 'i': /* only allow -i with -m or no option */ - if (mode && mode != show) { - monitor_help(); - exit(EXIT_FAILURE); - } + if (mode && mode != show) + print_wrong_arg_exit(); interval = atoi(optarg); break; case 'm': - if (mode) { - monitor_help(); - exit(EXIT_FAILURE); - } + if (mode) + print_wrong_arg_exit(); mode = show; show_monitors_param = optarg; break; default: - monitor_help(); - exit(EXIT_FAILURE); + print_wrong_arg_exit(); } } if (!mode) -- cgit v1.1 From 69566dd8be42dea7a22f625abc96e65bb4b45d1f Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 16 Aug 2011 11:24:37 -0700 Subject: PCI: OF: Don't crash when bridge parent is NULL. In pcibios_get_phb_of_node(), we will crash while booting if bus->bridge->parent is NULL. Check for this case and avoid dereferencing the NULL pointer. Signed-off-by: David Daney Acked-by: Benjamin Herrenschmidt Acked-by: Grant Likely Signed-off-by: Jesse Barnes --- drivers/pci/of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/of.c b/drivers/pci/of.c index c94d37e..f092993 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c @@ -55,7 +55,7 @@ struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus) */ if (bus->bridge->of_node) return of_node_get(bus->bridge->of_node); - if (bus->bridge->parent->of_node) + if (bus->bridge->parent && bus->bridge->parent->of_node) return of_node_get(bus->bridge->parent->of_node); return NULL; } -- cgit v1.1 From 016f1c54408b1e92e2e8087bfc05ca0a9c258513 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 11 Aug 2011 12:29:46 +0200 Subject: UBIFS: not build debug messages with CONFIG_UBIFS_FS_DEBUG disabled With $ grep -e UBIFS_FS_DEBUG -e DYNAMIC_DEBUG .config # CONFIG_UBIFS_FS_DEBUG is not set CONFIG_DYNAMIC_DEBUG=y Debug messages are kept in the object files due to the dynamic_pr_debug() macro, even if they are never going to be printed: $ make fs/ubifs/super.o $ strings fs/ubifs/super.o | grep 'compiled on' compiled on: Aug 11 2011 at 12:21:38 Use plain printk to fix this. Signed-off-by: Michal Marek Signed-off-by: Artem Bityutskiy --- fs/ubifs/debug.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 45174b5..feb361e 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -335,9 +335,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define DBGKEY(key) ((char *)(key)) #define DBGKEY1(key) ((char *)(key)) -#define ubifs_dbg_msg(fmt, ...) do { \ - if (0) \ - pr_debug(fmt "\n", ##__VA_ARGS__); \ +#define ubifs_dbg_msg(fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__); \ } while (0) #define dbg_dump_stack() -- cgit v1.1 From 9efabc84768ee8e79b50ad6ad6cff94d66da01f7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 19 Aug 2011 19:02:27 +0300 Subject: UBI: do not link debug messages when debugging is disabled Michal Marek spotted the same issue in UBIFS and this patch fixes UBI, see "UBIFS: not build debug messages with CONFIG_UBIFS_FS_DEBUG disabled" When UBI debugging is disabled, we have debugging messages defined as: if (0) pr_debug() But pr_debug macro defines data structures with debugging data and makes the linux binary larger, even though we have "if (0)". Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 65b5b76..64fbb00 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -181,7 +181,7 @@ static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) #define ubi_dbg_msg(fmt, ...) do { \ if (0) \ - pr_debug(fmt "\n", ##__VA_ARGS__); \ + printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__); \ } while (0) #define dbg_msg(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) -- cgit v1.1 From d555ab6bb321814853ca8a8d4e8e22d52e18a871 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 29 Jul 2011 21:11:43 -0700 Subject: max8998_charger: Needs module.h power/max8998_charger.c uses interfaces from linux/module.h, so it should include that file. This fixes build errors. Signed-off-by: Randy Dunlap Signed-off-by: Anton Vorontsov --- drivers/power/max8998_charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/max8998_charger.c b/drivers/power/max8998_charger.c index cc21fa2..ef8efad 100644 --- a/drivers/power/max8998_charger.c +++ b/drivers/power/max8998_charger.c @@ -20,6 +20,7 @@ */ #include +#include #include #include #include -- cgit v1.1 From 71aa79a8c2537eb07cd26b5e4dc43274a9c10692 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 1 Aug 2011 07:29:31 +0800 Subject: max8997_charger: Needs module.h power/max8997_charger.c uses interfaces from linux/module.h, so it should include that file. This fixes build errors. Signed-off-by: Axel Lin Acked-by: MyungJoo Ham Signed-off-by: Anton Vorontsov --- drivers/power/max8997_charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/max8997_charger.c b/drivers/power/max8997_charger.c index 7106b49..ffc5033 100644 --- a/drivers/power/max8997_charger.c +++ b/drivers/power/max8997_charger.c @@ -20,6 +20,7 @@ */ #include +#include #include #include #include -- cgit v1.1 From 815efa1eab5b0c3e071e5d6df0cc2d7e0c7e6fd7 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Fri, 12 Aug 2011 17:55:18 +0300 Subject: s3c-adc-battery: Fix compilation error due to missing header (module.h) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add linux/module.h to fix this compilation error: drivers/power/s3c_adc_battery.c:435:15: error: expected declaration specifiers or ‘...’ before string constant drivers/power/s3c_adc_battery.c:435:1: warning: data definition has no type or storage class drivers/power/s3c_adc_battery.c:435:1: warning: type defaults to ‘int’ in declaration of ‘MODULE_AUTHOR’ drivers/power/s3c_adc_battery.c:435:15: warning: function declaration isn’t a prototype drivers/power/s3c_adc_battery.c:436:20: error: expected declaration specifiers or ‘...’ before string constant drivers/power/s3c_adc_battery.c:436:1: warning: data definition has no type or storage class drivers/power/s3c_adc_battery.c:436:1: warning: type defaults to ‘int’ in declaration of ‘MODULE_DESCRIPTION’ drivers/power/s3c_adc_battery.c:436:20: warning: function declaration isn’t a prototype drivers/power/s3c_adc_battery.c:437:16: error: expected declaration specifiers or ‘...’ before string constant drivers/power/s3c_adc_battery.c:437:1: warning: data definition has no type or storage class drivers/power/s3c_adc_battery.c:437:1: warning: type defaults to ‘int’ in declaration of ‘MODULE_LICENSE’ drivers/power/s3c_adc_battery.c:437:16: warning: function declaration isn’t a prototype make[2]: *** [drivers/power/s3c_adc_battery.o] Error 1 Signed-off-by: Vasily Khoruzhick Signed-off-by: Ian Lartey Signed-off-by: Anton Vorontsov --- drivers/power/s3c_adc_battery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/s3c_adc_battery.c b/drivers/power/s3c_adc_battery.c index a675e31..d32d0d7 100644 --- a/drivers/power/s3c_adc_battery.c +++ b/drivers/power/s3c_adc_battery.c @@ -20,6 +20,7 @@ #include #include #include +#include #include -- cgit v1.1 From b095cd0a0ccdbc00c9fd99d90b22f8563687971f Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 12 Aug 2011 15:28:32 -0700 Subject: drm/i915: set GFX_MODE to pre-Ivybridge default value even on Ivybridge Prior to Ivybridge, the GFX_MODE would default to 0x800, meaning that MI_FLUSH would flush the TLBs in addition to the rest of the caches indicated in the MI_FLUSH command. However starting with Ivybridge, the register defaults to 0x2800 out of reset, meaning that to invalidate the TLB we need to use PIPE_CONTROL. Since we're not doing that yet, go back to the old default so things work. v2: don't forget to actually *clear* the new bit Reviewed-by: Eric Anholt Reviewed-by: Chris Wilson Tested-by: Kenneth Graunke Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5baaef4..542453f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -375,6 +375,7 @@ # define MI_FLUSH_ENABLE (1 << 11) #define GFX_MODE 0x02520 +#define GFX_MODE_GEN7 0x0229c #define GFX_RUN_LIST_ENABLE (1<<15) #define GFX_TLB_INVALIDATE_ALWAYS (1<<13) #define GFX_SURFACE_FAULT_ENABLE (1<<12) @@ -382,6 +383,9 @@ #define GFX_PSMI_GRANULARITY (1<<10) #define GFX_PPGTT_ENABLE (1<<9) +#define GFX_MODE_ENABLE(bit) (((bit) << 16) | (bit)) +#define GFX_MODE_DISABLE(bit) (((bit) << 16) | (0)) + #define SCPD0 0x0209c /* 915+ only */ #define IER 0x020a0 #define IIR 0x020a4 diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 47b9b27..c30626e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -290,6 +290,10 @@ static int init_render_ring(struct intel_ring_buffer *ring) if (IS_GEN6(dev) || IS_GEN7(dev)) mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE; I915_WRITE(MI_MODE, mode); + if (IS_GEN7(dev)) + I915_WRITE(GFX_MODE_GEN7, + GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | + GFX_MODE_ENABLE(GFX_REPLAY_MODE)); } if (INTEL_INFO(dev)->gen >= 6) { -- cgit v1.1 From c956b753e706f24d18a026f8efa4df3b1919fcc9 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Fri, 19 Aug 2011 16:59:39 -0600 Subject: OMAP: powerdomains: Make all powerdomain target states as ON at init Program all powerdomain target state as ON; this is to prevent domains from hitting low power states (if bootloader has target states set to something other than ON) and potentially even losing context while PM is not fully initialized, which can cause the system to crash. The PM late init code can then program the desired target state for all the power domains. Signed-off-by: Rajendra Nayak Signed-off-by: Santosh Shilimkar [paul@pwsan.com: dropped comment typo hunk; fixed comment indent and moved to kerneldoc; moved code to pwrdm_init(); changed pwrdm_init() argument name to prevent clash; cleaned up patch description] Signed-off-by: Paul Walmsley --- arch/arm/mach-omap2/powerdomain.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index 9af0847..ef71fdd 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -195,28 +195,35 @@ static int _pwrdm_post_transition_cb(struct powerdomain *pwrdm, void *unused) /** * pwrdm_init - set up the powerdomain layer - * @pwrdm_list: array of struct powerdomain pointers to register + * @pwrdms: array of struct powerdomain pointers to register * @custom_funcs: func pointers for arch specific implementations * - * Loop through the array of powerdomains @pwrdm_list, registering all - * that are available on the current CPU. If pwrdm_list is supplied - * and not null, all of the referenced powerdomains will be - * registered. No return value. XXX pwrdm_list is not really a - * "list"; it is an array. Rename appropriately. + * Loop through the array of powerdomains @pwrdms, registering all + * that are available on the current CPU. Also, program all + * powerdomain target state as ON; this is to prevent domains from + * hitting low power states (if bootloader has target states set to + * something other than ON) and potentially even losing context while + * PM is not fully initialized. The PM late init code can then program + * the desired target state for all the power domains. No return + * value. */ -void pwrdm_init(struct powerdomain **pwrdm_list, struct pwrdm_ops *custom_funcs) +void pwrdm_init(struct powerdomain **pwrdms, struct pwrdm_ops *custom_funcs) { struct powerdomain **p = NULL; + struct powerdomain *temp_p; if (!custom_funcs) WARN(1, "powerdomain: No custom pwrdm functions registered\n"); else arch_pwrdm = custom_funcs; - if (pwrdm_list) { - for (p = pwrdm_list; *p; p++) + if (pwrdms) { + for (p = pwrdms; *p; p++) _pwrdm_register(*p); } + + list_for_each_entry(temp_p, &pwrdm_list, node) + pwrdm_set_next_pwrst(temp_p, PWRDM_POWER_ON); } /** -- cgit v1.1 From b1cbdb00da2ac00eb67fe277e563ff1f5093b4ba Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Fri, 19 Aug 2011 16:59:39 -0600 Subject: OMAP: clockdomain: Wait for powerdomain to be ON when using clockdomain force wakeup While using clockdomain force wakeup method, not waiting for powerdomain to be effectively ON may end up locking the clockdomain FSM until a next wakeup event occurs. One such issue was seen on OMAP4430, where L4_PER was periodically getting stuck in in-transition state when transitioning from from OSWR to ON. This issue was reported and investigated by Patrick Titiano Signed-off-by: Santosh Shilimkar Signed-off-by: Rajendra Nayak Reported-by: Patrick Titiano Cc: Kevin Hilman Cc: Benoit Cousson Cc: Paul Walmsley [paul@pwsan.com: updated to apply; added transition wait on clkdm_deny_idle(); remove two superfluous pwrdm_wait_transition() calls] Signed-off-by: Paul Walmsley --- arch/arm/mach-omap2/clockdomain.c | 2 ++ arch/arm/mach-omap2/pm.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c index ab7db08..8f08906 100644 --- a/arch/arm/mach-omap2/clockdomain.c +++ b/arch/arm/mach-omap2/clockdomain.c @@ -747,6 +747,7 @@ int clkdm_wakeup(struct clockdomain *clkdm) spin_lock_irqsave(&clkdm->lock, flags); clkdm->_flags &= ~_CLKDM_FLAG_HWSUP_ENABLED; ret = arch_clkdm->clkdm_wakeup(clkdm); + ret |= pwrdm_state_switch(clkdm->pwrdm.ptr); spin_unlock_irqrestore(&clkdm->lock, flags); return ret; } @@ -818,6 +819,7 @@ void clkdm_deny_idle(struct clockdomain *clkdm) spin_lock_irqsave(&clkdm->lock, flags); clkdm->_flags &= ~_CLKDM_FLAG_HWSUP_ENABLED; arch_clkdm->clkdm_deny_idle(clkdm); + pwrdm_state_switch(clkdm->pwrdm.ptr); spin_unlock_irqrestore(&clkdm->lock, flags); } diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index 3feb359..472bf22 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -130,7 +130,6 @@ int omap_set_pwrdm_state(struct powerdomain *pwrdm, u32 state) } else { hwsup = clkdm_in_hwsup(pwrdm->pwrdm_clkdms[0]); clkdm_wakeup(pwrdm->pwrdm_clkdms[0]); - pwrdm_wait_transition(pwrdm); sleep_switch = FORCEWAKEUP_SWITCH; } } @@ -156,7 +155,6 @@ int omap_set_pwrdm_state(struct powerdomain *pwrdm, u32 state) return ret; } - pwrdm_wait_transition(pwrdm); pwrdm_state_switch(pwrdm); err: return ret; -- cgit v1.1 From 9c5f560173a466582d91bb06f4e3d2bafb0fee5c Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Fri, 19 Aug 2011 16:59:56 -0600 Subject: OMAP4: clock: re-enable previous clockdomain enable/disable sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 665d001338b494d6d62810aa99b4c0fa1a0884b9 ("OMAP2+: hwmod: Follow the recommended PRCM module enable sequence"), device drivers for OMAP IP blocks that do not use runtime PM can cause oopses or kernel instability[1][2]. This is because those non-runtime PM drivers do not use the hwmod code, which implements the correct IP block enable and disable sequence. Several options for dealing with this problem have been proposed: 1. Add a new field to the OMAP struct clk to mark clocks that are currently used by non-runtime PM drivers. Modify the clock code to use the old clockdomain sequence for these marked clocks. As drivers are converted to use runtime PM, remove the annotation from the clocks. 2. Similar to #1, but associate the flag with the struct omap_clk instead. 3. Add IDLEST wait support to the OMAP4 clock code, similar to the way it is implemented for OMAP2/3, and enable it in each struct clk currently used by non-runtime PM drivers. As drivers are converted to use runtime PM, remove the annotation from the clocks. 4. Do nothing; leave the problem to those responsible for the unconverted drivers. 5. Re-enable clock-based clockdomain control in the OMAP4 clock code. This would revert back to the behavior of Linux 3.0, simply with a slightly longer module enable/disable latency. Unfortunately, no approach seemed particularly good. Options 1 through 3 seemed unwise due to the following reasons: A. The OMAP struct clks are intended primarily to describe hardware clock nodes, and the intention is that no driver-specific data should be stored there (applies to #1) B. The resulting patch would have been quite large for the -rc series (applies to #1, #2, #3) C. The patch would have been a new, yet temporary hack; and similar fixes have drawn negative comments in the recent past (see for example [3]) Option 4 is undesirable because commit 665d001338b494d6d62810aa99b4c0fa1a0884b9 ("OMAP2+: hwmod: Follow the recommended PRCM module enable sequence") has resulted in a less stable kernel; and kernel stability is more important than OMAP4 power management. Option 5 is the approach taken in this patch. This seemed to be the least intrusive approach for 3.1-rc. The approach in this patch was originally proposed by Ohad Ben-Cohen . I'm simply writing the commit message and passing it along. ... Thanks to Luciano Coelho for reporting the problem. Thanks to Ohad Ben-Cohen for tracking the problem down, generating a temporary workaround, and proposing a patch to deal with the problem. Thanks to Rajendra Nayak for proposing another patch to deal with the problem. Thanks to Felipe Balbi for comments. 1. Coelho, Luciano . _Re: Oops on ehci_hcd when booting 3.0.0-rc2 on panda_. Tue, 09 Aug 2011 14:26:08 +0300. Posted to the mailing list. Available from (among others) http://www.spinics.net/linux/lists/linux-omap/msg55213.html 2. Munegowda, Keshava . _Re: Oops on ehci_hcd when booting 3.0.0-rc2 on panda_. Thu, 11 Aug 2011 13:51:05 +0530. Posted to the mailing list. Available from (among others) http://www.spinics.net/linux/lists/linux-omap/msg55371.html 3. King, Russell . _Re: [PATCH 5/8] OMAP4: PM: TEMP: Prevent l3init from idling/force sleep_. Thu, 23 Jun 2011 16:22:49 +0100. Posted to the mailing list. Available from (among others) http://www.mail-archive.com/linux-omap@vger.kernel.org/msg51392.html Signed-off-by: Paul Walmsley Cc: Luciano Coelho Cc: Ohad Ben-Cohen Cc: Rajendra Nayak Cc: Benoît Cousson Acked-by: Santosh Shilimkar --- arch/arm/mach-omap2/clock44xx_data.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c index 2af0e3f0..a3a1827 100644 --- a/arch/arm/mach-omap2/clock44xx_data.c +++ b/arch/arm/mach-omap2/clock44xx_data.c @@ -3379,7 +3379,13 @@ int __init omap4xxx_clk_init(void) } clk_init(&omap2_clk_functions); - omap2_clk_disable_clkdm_control(); + + /* + * Must stay commented until all OMAP SoC drivers are + * converted to runtime PM, or drivers may start crashing + * + * omap2_clk_disable_clkdm_control(); + */ for (c = omap44xx_clks; c < omap44xx_clks + ARRAY_SIZE(omap44xx_clks); c++) -- cgit v1.1 From dccaf33fa37a1bc5d651baeb3bfeb6becb86597b Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Fri, 19 Aug 2011 19:13:32 -0400 Subject: ext4: flush any pending end_io requests before DIO reads w/dioread_nolock There is a race between ext4 buffer write and direct_IO read with dioread_nolock mount option enabled. The problem is that we clear PageWriteback flag during end_io time but will do uninitialized-to-initialized extent conversion later with dioread_nolock. If an O_direct read request comes in during this period, ext4 will return zero instead of the recently written data. This patch checks whether there are any pending uninitialized-to-initialized extent conversion requests before doing O_direct read to close the race. Note that this is just a bandaid fix. The fundamental issue is that we clear PageWriteback flag before we really complete an IO, which is problem-prone. To fix the fundamental issue, we may need to implement an extent tree cache that we can use to look up pending to-be-converted extents. Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/indirect.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index b8602cd..0962642 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -800,12 +800,17 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, } retry: - if (rw == READ && ext4_should_dioread_nolock(inode)) + if (rw == READ && ext4_should_dioread_nolock(inode)) { + if (unlikely(!list_empty(&ei->i_completed_io_list))) { + mutex_lock(&inode->i_mutex); + ext4_flush_completed_IO(inode); + mutex_unlock(&inode->i_mutex); + } ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ext4_get_block, NULL, NULL, 0); - else { + } else { ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, ext4_get_block); -- cgit v1.1 From b6acf013bdc6f6ff9643030add85832d44034a28 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 20 Aug 2011 09:14:45 +0200 Subject: ALSA: hda - Don't spew too many ELD errors Currently HD-audio driver shows the all error ELD byte as an error in the kernel message. This is annoying when the video driver doesn't set the correct ELD from the beginning. e.g. radeon sends a zero-byte data, but we still check ELD with the fixed 128 byte as a workaround for some broken devices, it spews 128-times errors. For avoiding this, the driver aborts reading when the first byte is invalid. In such a case, the whole data is certainly invalid. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_eld.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c index 28ce17d..c34f730 100644 --- a/sound/pci/hda/hda_eld.c +++ b/sound/pci/hda/hda_eld.c @@ -144,25 +144,17 @@ static int cea_sampling_frequencies[8] = { SNDRV_PCM_RATE_192000, /* 7: 192000Hz */ }; -static unsigned char hdmi_get_eld_byte(struct hda_codec *codec, hda_nid_t nid, +static unsigned int hdmi_get_eld_data(struct hda_codec *codec, hda_nid_t nid, int byte_index) { unsigned int val; val = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_HDMI_ELDD, byte_index); - #ifdef BE_PARANOID printk(KERN_INFO "HDMI: ELD data byte %d: 0x%x\n", byte_index, val); #endif - - if ((val & AC_ELDD_ELD_VALID) == 0) { - snd_printd(KERN_INFO "HDMI: invalid ELD data byte %d\n", - byte_index); - val = 0; - } - - return val & AC_ELDD_ELD_DATA; + return val; } #define GRAB_BITS(buf, byte, lowbit, bits) \ @@ -344,11 +336,26 @@ int snd_hdmi_get_eld(struct hdmi_eld *eld, if (!buf) return -ENOMEM; - for (i = 0; i < size; i++) - buf[i] = hdmi_get_eld_byte(codec, nid, i); + for (i = 0; i < size; i++) { + unsigned int val = hdmi_get_eld_data(codec, nid, i); + if (!(val & AC_ELDD_ELD_VALID)) { + if (!i) { + snd_printd(KERN_INFO + "HDMI: invalid ELD data\n"); + ret = -EINVAL; + goto error; + } + snd_printd(KERN_INFO + "HDMI: invalid ELD data byte %d\n", i); + val = 0; + } else + val &= AC_ELDD_ELD_DATA; + buf[i] = val; + } ret = hdmi_update_eld(eld, buf, size); +error: kfree(buf); return ret; } -- cgit v1.1 From 1b004d03d8670bdd871e0f297ed20bc510e404de Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 20 Aug 2011 09:19:59 +0200 Subject: ALSA: hda - Fix error check from snd_hda_get_conn_index() in patch_cirrus.c snd_hda_get_conn_index() returns a negative value while the current code stores it in an unsigned int. It must be stored in a signed integer. Reported-by: Jesper Juhl Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_cirrus.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 47d6ffc..d6c93d9 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -375,7 +375,7 @@ static int is_ext_mic(struct hda_codec *codec, unsigned int idx) static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin, unsigned int *idxp) { - int i; + int i, idx; hda_nid_t nid; nid = codec->start_nid; @@ -384,9 +384,11 @@ static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin, type = get_wcaps_type(get_wcaps(codec, nid)); if (type != AC_WID_AUD_IN) continue; - *idxp = snd_hda_get_conn_index(codec, nid, pin, false); - if (*idxp >= 0) + idx = snd_hda_get_conn_index(codec, nid, pin, false); + if (idx >= 0) { + *idxp = idx; return nid; + } } return 0; } -- cgit v1.1 From de75577c8c3ab733f808c65e1a9d55882efde68e Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 20 Aug 2011 08:12:41 +0200 Subject: ALSA: sound/aoa/fabrics/layout.c: remove unneeded kfree The label outnodev is only used when kzalloc has not yet taken place or has failed, so there is no need for the call for kfree under this label. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; expression E1!=0,E2,E3,E4; statement S; iterator I; @@ ( if (...) { ... when != kfree(x) when != x = E3 when != E3 = x * return ...; } ... when != x = E2 when != I(...,x,...) S if (...) { ... when != x = E4 kfree(x); ... return ...; } ) // Signed-off-by: Julia Lawall Signed-off-by: Takashi Iwai --- sound/aoa/fabrics/layout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/aoa/fabrics/layout.c b/sound/aoa/fabrics/layout.c index 3fd1a7e..552b97a 100644 --- a/sound/aoa/fabrics/layout.c +++ b/sound/aoa/fabrics/layout.c @@ -1073,10 +1073,10 @@ static int aoa_fabric_layout_probe(struct soundbus_dev *sdev) sdev->pcmid = -1; list_del(&ldev->list); layouts_list_items--; + kfree(ldev); outnodev: of_node_put(sound); layout_device = NULL; - kfree(ldev); return -ENODEV; } -- cgit v1.1 From fbe5e29ec1886967255e76946aaf537b8cc9b81e Mon Sep 17 00:00:00 2001 From: Daniel Schwierzeck Date: Fri, 19 Aug 2011 12:04:20 +0000 Subject: atm: br2684: Fix oops due to skb->dev being NULL This oops have been already fixed with commit 27141666b69f535a4d63d7bc6d9e84ee5032f82a atm: [br2684] Fix oops due to skb->dev being NULL It happens that if a packet arrives in a VC between the call to open it on the hardware and the call to change the backend to br2684, br2684_regvcc processes the packet and oopses dereferencing skb->dev because it is NULL before the call to br2684_push(). but have been introduced again with commit b6211ae7f2e56837c6a4849316396d1535606e90 atm: Use SKB queue and list helpers instead of doing it by-hand. Signed-off-by: Daniel Schwierzeck Signed-off-by: David S. Miller --- net/atm/br2684.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 52cfd0c..d07223c 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -558,12 +558,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) spin_unlock_irqrestore(&rq->lock, flags); skb_queue_walk_safe(&queue, skb, tmp) { - struct net_device *dev = skb->dev; + struct net_device *dev; + + br2684_push(atmvcc, skb); + dev = skb->dev; dev->stats.rx_bytes -= skb->len; dev->stats.rx_packets--; - - br2684_push(atmvcc, skb); } /* initialize netdev carrier state */ -- cgit v1.1 From d70d43d7d719ab709af7df109e706e804fe21834 Mon Sep 17 00:00:00 2001 From: Jiejing Zhang Date: Sat, 20 Aug 2011 14:38:01 -0700 Subject: Input: max11801_ts - correct license statement The original license statement was confusing since it was unclear if the license was pure GPLv2 or GPLv2+ and did not match the license of the driver max11801_ts was derived from. The license is GPLv2+. Signed-off-by: Jiejing Zhang Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/max11801_ts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/max11801_ts.c b/drivers/input/touchscreen/max11801_ts.c index 4f2713d..4627fe5 100644 --- a/drivers/input/touchscreen/max11801_ts.c +++ b/drivers/input/touchscreen/max11801_ts.c @@ -9,7 +9,8 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License. + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. */ /* -- cgit v1.1 From 5598473a5b40c47a8c5349dd2c2630797169cf1a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 20 Aug 2011 17:14:54 -0700 Subject: sparc: Allow handling signals when stack is corrupted. If we can't push the pending register windows onto the user's stack, we disallow signal delivery even if the signal would be delivered on a valid seperate signal stack. Add a register window save area in the signal frame, and store any unsavable windows there. On sigreturn, if any windows are still queued up in the signal frame, try to push them back onto the stack and if that fails we kill the process immediately. This allows the debug/tst-longjmp_chk2 glibc test case to pass. Signed-off-by: David S. Miller --- arch/sparc/include/asm/sigcontext.h | 14 +++ arch/sparc/kernel/Makefile | 1 + arch/sparc/kernel/signal32.c | 184 ++++++++++++++++++++---------------- arch/sparc/kernel/signal_32.c | 172 ++++++++++++++++----------------- arch/sparc/kernel/signal_64.c | 108 +++++++++------------ arch/sparc/kernel/sigutil.h | 9 ++ arch/sparc/kernel/sigutil_32.c | 120 +++++++++++++++++++++++ arch/sparc/kernel/sigutil_64.c | 93 ++++++++++++++++++ 8 files changed, 468 insertions(+), 233 deletions(-) create mode 100644 arch/sparc/kernel/sigutil.h create mode 100644 arch/sparc/kernel/sigutil_32.c create mode 100644 arch/sparc/kernel/sigutil_64.c diff --git a/arch/sparc/include/asm/sigcontext.h b/arch/sparc/include/asm/sigcontext.h index a1607d18..69914d7 100644 --- a/arch/sparc/include/asm/sigcontext.h +++ b/arch/sparc/include/asm/sigcontext.h @@ -45,6 +45,19 @@ typedef struct { int si_mask; } __siginfo32_t; +#define __SIGC_MAXWIN 7 + +typedef struct { + unsigned long locals[8]; + unsigned long ins[8]; +} __siginfo_reg_window; + +typedef struct { + int wsaved; + __siginfo_reg_window reg_window[__SIGC_MAXWIN]; + unsigned long rwbuf_stkptrs[__SIGC_MAXWIN]; +} __siginfo_rwin_t; + #ifdef CONFIG_SPARC64 typedef struct { unsigned int si_float_regs [64]; @@ -73,6 +86,7 @@ struct sigcontext { unsigned long ss_size; } sigc_stack; unsigned long sigc_mask; + __siginfo_rwin_t * sigc_rwin_save; }; #else diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index b90b4a1..cb85458 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_SPARC32) += sun4m_irq.o sun4c_irq.o sun4d_irq.o obj-y += process_$(BITS).o obj-y += signal_$(BITS).o +obj-y += sigutil_$(BITS).o obj-$(CONFIG_SPARC32) += ioport.o obj-y += setup_$(BITS).o obj-y += idprom.o diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 75fad42..1ba95af 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -29,6 +29,8 @@ #include #include +#include "sigutil.h" + #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) /* This magic should be in g_upper[0] for all upper parts @@ -44,14 +46,14 @@ typedef struct { struct signal_frame32 { struct sparc_stackf32 ss; __siginfo32_t info; - /* __siginfo_fpu32_t * */ u32 fpu_save; + /* __siginfo_fpu_t * */ u32 fpu_save; unsigned int insns[2]; unsigned int extramask[_COMPAT_NSIG_WORDS - 1]; unsigned int extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */ /* Only valid if (info.si_regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */ siginfo_extra_v8plus_t v8plus; - __siginfo_fpu_t fpu_state; -}; + /* __siginfo_rwin_t * */u32 rwin_save; +} __attribute__((aligned(8))); typedef struct compat_siginfo{ int si_signo; @@ -110,18 +112,14 @@ struct rt_signal_frame32 { compat_siginfo_t info; struct pt_regs32 regs; compat_sigset_t mask; - /* __siginfo_fpu32_t * */ u32 fpu_save; + /* __siginfo_fpu_t * */ u32 fpu_save; unsigned int insns[2]; stack_t32 stack; unsigned int extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */ /* Only valid if (regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */ siginfo_extra_v8plus_t v8plus; - __siginfo_fpu_t fpu_state; -}; - -/* Align macros */ -#define SF_ALIGNEDSZ (((sizeof(struct signal_frame32) + 15) & (~15))) -#define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame32) + 15) & (~15))) + /* __siginfo_rwin_t * */u32 rwin_save; +} __attribute__((aligned(8))); int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { @@ -192,30 +190,13 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) return 0; } -static int restore_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) -{ - unsigned long *fpregs = current_thread_info()->fpregs; - unsigned long fprs; - int err; - - err = __get_user(fprs, &fpu->si_fprs); - fprs_write(0); - regs->tstate &= ~TSTATE_PEF; - if (fprs & FPRS_DL) - err |= copy_from_user(fpregs, &fpu->si_float_regs[0], (sizeof(unsigned int) * 32)); - if (fprs & FPRS_DU) - err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], (sizeof(unsigned int) * 32)); - err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr); - err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr); - current_thread_info()->fpsaved[0] |= fprs; - return err; -} - void do_sigreturn32(struct pt_regs *regs) { struct signal_frame32 __user *sf; + compat_uptr_t fpu_save; + compat_uptr_t rwin_save; unsigned int psr; - unsigned pc, npc, fpu_save; + unsigned pc, npc; sigset_t set; unsigned seta[_COMPAT_NSIG_WORDS]; int err, i; @@ -273,8 +254,13 @@ void do_sigreturn32(struct pt_regs *regs) pt_regs_clear_syscall(regs); err |= __get_user(fpu_save, &sf->fpu_save); - if (fpu_save) - err |= restore_fpu_state32(regs, &sf->fpu_state); + if (!err && fpu_save) + err |= restore_fpu_state(regs, compat_ptr(fpu_save)); + err |= __get_user(rwin_save, &sf->rwin_save); + if (!err && rwin_save) { + if (restore_rwin_state(compat_ptr(rwin_save))) + goto segv; + } err |= __get_user(seta[0], &sf->info.si_mask); err |= copy_from_user(seta+1, &sf->extramask, (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int)); @@ -300,7 +286,9 @@ segv: asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) { struct rt_signal_frame32 __user *sf; - unsigned int psr, pc, npc, fpu_save, u_ss_sp; + unsigned int psr, pc, npc, u_ss_sp; + compat_uptr_t fpu_save; + compat_uptr_t rwin_save; mm_segment_t old_fs; sigset_t set; compat_sigset_t seta; @@ -359,8 +347,8 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) pt_regs_clear_syscall(regs); err |= __get_user(fpu_save, &sf->fpu_save); - if (fpu_save) - err |= restore_fpu_state32(regs, &sf->fpu_state); + if (!err && fpu_save) + err |= restore_fpu_state(regs, compat_ptr(fpu_save)); err |= copy_from_user(&seta, &sf->mask, sizeof(compat_sigset_t)); err |= __get_user(u_ss_sp, &sf->stack.ss_sp); st.ss_sp = compat_ptr(u_ss_sp); @@ -376,6 +364,12 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) do_sigaltstack((stack_t __user *) &st, NULL, (unsigned long)sf); set_fs(old_fs); + err |= __get_user(rwin_save, &sf->rwin_save); + if (!err && rwin_save) { + if (restore_rwin_state(compat_ptr(rwin_save))) + goto segv; + } + switch (_NSIG_WORDS) { case 4: set.sig[3] = seta.sig[6] + (((long)seta.sig[7]) << 32); case 3: set.sig[2] = seta.sig[4] + (((long)seta.sig[5]) << 32); @@ -433,26 +427,6 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns return (void __user *) sp; } -static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) -{ - unsigned long *fpregs = current_thread_info()->fpregs; - unsigned long fprs; - int err = 0; - - fprs = current_thread_info()->fpsaved[0]; - if (fprs & FPRS_DL) - err |= copy_to_user(&fpu->si_float_regs[0], fpregs, - (sizeof(unsigned int) * 32)); - if (fprs & FPRS_DU) - err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16, - (sizeof(unsigned int) * 32)); - err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr); - err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr); - err |= __put_user(fprs, &fpu->si_fprs); - - return err; -} - /* The I-cache flush instruction only works in the primary ASI, which * right now is the nucleus, aka. kernel space. * @@ -515,18 +489,23 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, int signo, sigset_t *oldset) { struct signal_frame32 __user *sf; + int i, err, wsaved; + void __user *tail; int sigframe_size; u32 psr; - int i, err; unsigned int seta[_COMPAT_NSIG_WORDS]; /* 1. Make sure everything is clean */ synchronize_user_stack(); save_and_clear_fpu(); - sigframe_size = SF_ALIGNEDSZ; - if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) - sigframe_size -= sizeof(__siginfo_fpu_t); + wsaved = get_thread_wsaved(); + + sigframe_size = sizeof(*sf); + if (current_thread_info()->fpsaved[0] & FPRS_FEF) + sigframe_size += sizeof(__siginfo_fpu_t); + if (wsaved) + sigframe_size += sizeof(__siginfo_rwin_t); sf = (struct signal_frame32 __user *) get_sigframe(&ka->sa, regs, sigframe_size); @@ -534,8 +513,7 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, if (invalid_frame_pointer(sf, sigframe_size)) goto sigill; - if (get_thread_wsaved() != 0) - goto sigill; + tail = (sf + 1); /* 2. Save the current process state */ if (test_thread_flag(TIF_32BIT)) { @@ -560,11 +538,22 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, &sf->v8plus.asi); if (psr & PSR_EF) { - err |= save_fpu_state32(regs, &sf->fpu_state); - err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save); + __siginfo_fpu_t __user *fp = tail; + tail += sizeof(*fp); + err |= save_fpu_state(regs, fp); + err |= __put_user((u64)fp, &sf->fpu_save); } else { err |= __put_user(0, &sf->fpu_save); } + if (wsaved) { + __siginfo_rwin_t __user *rwp = tail; + tail += sizeof(*rwp); + err |= save_rwin_state(wsaved, rwp); + err |= __put_user((u64)rwp, &sf->rwin_save); + set_thread_wsaved(0); + } else { + err |= __put_user(0, &sf->rwin_save); + } switch (_NSIG_WORDS) { case 4: seta[7] = (oldset->sig[3] >> 32); @@ -580,10 +569,21 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, err |= __copy_to_user(sf->extramask, seta + 1, (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int)); - err |= copy_in_user((u32 __user *)sf, - (u32 __user *)(regs->u_regs[UREG_FP]), - sizeof(struct reg_window32)); - + if (!wsaved) { + err |= copy_in_user((u32 __user *)sf, + (u32 __user *)(regs->u_regs[UREG_FP]), + sizeof(struct reg_window32)); + } else { + struct reg_window *rp; + + rp = ¤t_thread_info()->reg_window[wsaved - 1]; + for (i = 0; i < 8; i++) + err |= __put_user(rp->locals[i], &sf->ss.locals[i]); + for (i = 0; i < 6; i++) + err |= __put_user(rp->ins[i], &sf->ss.ins[i]); + err |= __put_user(rp->ins[6], &sf->ss.fp); + err |= __put_user(rp->ins[7], &sf->ss.callers_pc); + } if (err) goto sigsegv; @@ -613,7 +613,6 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, err |= __put_user(0x91d02010, &sf->insns[1]); /*t 0x10*/ if (err) goto sigsegv; - flush_signal_insns(address); } return 0; @@ -632,18 +631,23 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, siginfo_t *info) { struct rt_signal_frame32 __user *sf; + int i, err, wsaved; + void __user *tail; int sigframe_size; u32 psr; - int i, err; compat_sigset_t seta; /* 1. Make sure everything is clean */ synchronize_user_stack(); save_and_clear_fpu(); - sigframe_size = RT_ALIGNEDSZ; - if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) - sigframe_size -= sizeof(__siginfo_fpu_t); + wsaved = get_thread_wsaved(); + + sigframe_size = sizeof(*sf); + if (current_thread_info()->fpsaved[0] & FPRS_FEF) + sigframe_size += sizeof(__siginfo_fpu_t); + if (wsaved) + sigframe_size += sizeof(__siginfo_rwin_t); sf = (struct rt_signal_frame32 __user *) get_sigframe(&ka->sa, regs, sigframe_size); @@ -651,8 +655,7 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, if (invalid_frame_pointer(sf, sigframe_size)) goto sigill; - if (get_thread_wsaved() != 0) - goto sigill; + tail = (sf + 1); /* 2. Save the current process state */ if (test_thread_flag(TIF_32BIT)) { @@ -677,11 +680,22 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, &sf->v8plus.asi); if (psr & PSR_EF) { - err |= save_fpu_state32(regs, &sf->fpu_state); - err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save); + __siginfo_fpu_t __user *fp = tail; + tail += sizeof(*fp); + err |= save_fpu_state(regs, fp); + err |= __put_user((u64)fp, &sf->fpu_save); } else { err |= __put_user(0, &sf->fpu_save); } + if (wsaved) { + __siginfo_rwin_t __user *rwp = tail; + tail += sizeof(*rwp); + err |= save_rwin_state(wsaved, rwp); + err |= __put_user((u64)rwp, &sf->rwin_save); + set_thread_wsaved(0); + } else { + err |= __put_user(0, &sf->rwin_save); + } /* Update the siginfo structure. */ err |= copy_siginfo_to_user32(&sf->info, info); @@ -703,9 +717,21 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, } err |= __copy_to_user(&sf->mask, &seta, sizeof(compat_sigset_t)); - err |= copy_in_user((u32 __user *)sf, - (u32 __user *)(regs->u_regs[UREG_FP]), - sizeof(struct reg_window32)); + if (!wsaved) { + err |= copy_in_user((u32 __user *)sf, + (u32 __user *)(regs->u_regs[UREG_FP]), + sizeof(struct reg_window32)); + } else { + struct reg_window *rp; + + rp = ¤t_thread_info()->reg_window[wsaved - 1]; + for (i = 0; i < 8; i++) + err |= __put_user(rp->locals[i], &sf->ss.locals[i]); + for (i = 0; i < 6; i++) + err |= __put_user(rp->ins[i], &sf->ss.ins[i]); + err |= __put_user(rp->ins[6], &sf->ss.fp); + err |= __put_user(rp->ins[7], &sf->ss.callers_pc); + } if (err) goto sigsegv; diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 5e5c5fd..04ede8f 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -26,6 +26,8 @@ #include #include /* flush_sig_insns */ +#include "sigutil.h" + #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) extern void fpsave(unsigned long *fpregs, unsigned long *fsr, @@ -39,8 +41,8 @@ struct signal_frame { unsigned long insns[2] __attribute__ ((aligned (8))); unsigned int extramask[_NSIG_WORDS - 1]; unsigned int extra_size; /* Should be 0 */ - __siginfo_fpu_t fpu_state; -}; + __siginfo_rwin_t __user *rwin_save; +} __attribute__((aligned(8))); struct rt_signal_frame { struct sparc_stackf ss; @@ -51,8 +53,8 @@ struct rt_signal_frame { unsigned int insns[2]; stack_t stack; unsigned int extra_size; /* Should be 0 */ - __siginfo_fpu_t fpu_state; -}; + __siginfo_rwin_t __user *rwin_save; +} __attribute__((aligned(8))); /* Align macros */ #define SF_ALIGNEDSZ (((sizeof(struct signal_frame) + 7) & (~7))) @@ -79,43 +81,13 @@ asmlinkage int sys_sigsuspend(old_sigset_t set) return _sigpause_common(set); } -static inline int -restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) -{ - int err; -#ifdef CONFIG_SMP - if (test_tsk_thread_flag(current, TIF_USEDFPU)) - regs->psr &= ~PSR_EF; -#else - if (current == last_task_used_math) { - last_task_used_math = NULL; - regs->psr &= ~PSR_EF; - } -#endif - set_used_math(); - clear_tsk_thread_flag(current, TIF_USEDFPU); - - if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu))) - return -EFAULT; - - err = __copy_from_user(¤t->thread.float_regs[0], &fpu->si_float_regs[0], - (sizeof(unsigned long) * 32)); - err |= __get_user(current->thread.fsr, &fpu->si_fsr); - err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth); - if (current->thread.fpqdepth != 0) - err |= __copy_from_user(¤t->thread.fpqueue[0], - &fpu->si_fpqueue[0], - ((sizeof(unsigned long) + - (sizeof(unsigned long *)))*16)); - return err; -} - asmlinkage void do_sigreturn(struct pt_regs *regs) { struct signal_frame __user *sf; unsigned long up_psr, pc, npc; sigset_t set; __siginfo_fpu_t __user *fpu_save; + __siginfo_rwin_t __user *rwin_save; int err; /* Always make any pending restarted system calls return -EINTR */ @@ -150,9 +122,11 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) pt_regs_clear_syscall(regs); err |= __get_user(fpu_save, &sf->fpu_save); - if (fpu_save) err |= restore_fpu_state(regs, fpu_save); + err |= __get_user(rwin_save, &sf->rwin_save); + if (rwin_save) + err |= restore_rwin_state(rwin_save); /* This is pretty much atomic, no amount locking would prevent * the races which exist anyways. @@ -180,6 +154,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) struct rt_signal_frame __user *sf; unsigned int psr, pc, npc; __siginfo_fpu_t __user *fpu_save; + __siginfo_rwin_t __user *rwin_save; mm_segment_t old_fs; sigset_t set; stack_t st; @@ -207,8 +182,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) pt_regs_clear_syscall(regs); err |= __get_user(fpu_save, &sf->fpu_save); - - if (fpu_save) + if (!err && fpu_save) err |= restore_fpu_state(regs, fpu_save); err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t)); @@ -228,6 +202,12 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) do_sigaltstack((const stack_t __user *) &st, NULL, (unsigned long)sf); set_fs(old_fs); + err |= __get_user(rwin_save, &sf->rwin_save); + if (!err && rwin_save) { + if (restore_rwin_state(rwin_save)) + goto segv; + } + sigdelsetmask(&set, ~_BLOCKABLE); spin_lock_irq(¤t->sighand->siglock); current->blocked = set; @@ -280,53 +260,23 @@ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *re return (void __user *) sp; } -static inline int -save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) -{ - int err = 0; -#ifdef CONFIG_SMP - if (test_tsk_thread_flag(current, TIF_USEDFPU)) { - put_psr(get_psr() | PSR_EF); - fpsave(¤t->thread.float_regs[0], ¤t->thread.fsr, - ¤t->thread.fpqueue[0], ¤t->thread.fpqdepth); - regs->psr &= ~(PSR_EF); - clear_tsk_thread_flag(current, TIF_USEDFPU); - } -#else - if (current == last_task_used_math) { - put_psr(get_psr() | PSR_EF); - fpsave(¤t->thread.float_regs[0], ¤t->thread.fsr, - ¤t->thread.fpqueue[0], ¤t->thread.fpqdepth); - last_task_used_math = NULL; - regs->psr &= ~(PSR_EF); - } -#endif - err |= __copy_to_user(&fpu->si_float_regs[0], - ¤t->thread.float_regs[0], - (sizeof(unsigned long) * 32)); - err |= __put_user(current->thread.fsr, &fpu->si_fsr); - err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth); - if (current->thread.fpqdepth != 0) - err |= __copy_to_user(&fpu->si_fpqueue[0], - ¤t->thread.fpqueue[0], - ((sizeof(unsigned long) + - (sizeof(unsigned long *)))*16)); - clear_used_math(); - return err; -} - static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs, int signo, sigset_t *oldset) { struct signal_frame __user *sf; - int sigframe_size, err; + int sigframe_size, err, wsaved; + void __user *tail; /* 1. Make sure everything is clean */ synchronize_user_stack(); - sigframe_size = SF_ALIGNEDSZ; - if (!used_math()) - sigframe_size -= sizeof(__siginfo_fpu_t); + wsaved = current_thread_info()->w_saved; + + sigframe_size = sizeof(*sf); + if (used_math()) + sigframe_size += sizeof(__siginfo_fpu_t); + if (wsaved) + sigframe_size += sizeof(__siginfo_rwin_t); sf = (struct signal_frame __user *) get_sigframe(&ka->sa, regs, sigframe_size); @@ -334,8 +284,7 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs, if (invalid_frame_pointer(sf, sigframe_size)) goto sigill_and_return; - if (current_thread_info()->w_saved != 0) - goto sigill_and_return; + tail = sf + 1; /* 2. Save the current process state */ err = __copy_to_user(&sf->info.si_regs, regs, sizeof(struct pt_regs)); @@ -343,17 +292,34 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs, err |= __put_user(0, &sf->extra_size); if (used_math()) { - err |= save_fpu_state(regs, &sf->fpu_state); - err |= __put_user(&sf->fpu_state, &sf->fpu_save); + __siginfo_fpu_t __user *fp = tail; + tail += sizeof(*fp); + err |= save_fpu_state(regs, fp); + err |= __put_user(fp, &sf->fpu_save); } else { err |= __put_user(0, &sf->fpu_save); } + if (wsaved) { + __siginfo_rwin_t __user *rwp = tail; + tail += sizeof(*rwp); + err |= save_rwin_state(wsaved, rwp); + err |= __put_user(rwp, &sf->rwin_save); + } else { + err |= __put_user(0, &sf->rwin_save); + } err |= __put_user(oldset->sig[0], &sf->info.si_mask); err |= __copy_to_user(sf->extramask, &oldset->sig[1], (_NSIG_WORDS - 1) * sizeof(unsigned int)); - err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP], - sizeof(struct reg_window32)); + if (!wsaved) { + err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP], + sizeof(struct reg_window32)); + } else { + struct reg_window32 *rp; + + rp = ¤t_thread_info()->reg_window[wsaved - 1]; + err |= __copy_to_user(sf, rp, sizeof(struct reg_window32)); + } if (err) goto sigsegv; @@ -399,21 +365,24 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, int signo, sigset_t *oldset, siginfo_t *info) { struct rt_signal_frame __user *sf; - int sigframe_size; + int sigframe_size, wsaved; + void __user *tail; unsigned int psr; int err; synchronize_user_stack(); - sigframe_size = RT_ALIGNEDSZ; - if (!used_math()) - sigframe_size -= sizeof(__siginfo_fpu_t); + wsaved = current_thread_info()->w_saved; + sigframe_size = sizeof(*sf); + if (used_math()) + sigframe_size += sizeof(__siginfo_fpu_t); + if (wsaved) + sigframe_size += sizeof(__siginfo_rwin_t); sf = (struct rt_signal_frame __user *) get_sigframe(&ka->sa, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) goto sigill; - if (current_thread_info()->w_saved != 0) - goto sigill; + tail = sf + 1; err = __put_user(regs->pc, &sf->regs.pc); err |= __put_user(regs->npc, &sf->regs.npc); err |= __put_user(regs->y, &sf->regs.y); @@ -425,11 +394,21 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, err |= __put_user(0, &sf->extra_size); if (psr & PSR_EF) { - err |= save_fpu_state(regs, &sf->fpu_state); - err |= __put_user(&sf->fpu_state, &sf->fpu_save); + __siginfo_fpu_t *fp = tail; + tail += sizeof(*fp); + err |= save_fpu_state(regs, fp); + err |= __put_user(fp, &sf->fpu_save); } else { err |= __put_user(0, &sf->fpu_save); } + if (wsaved) { + __siginfo_rwin_t *rwp = tail; + tail += sizeof(*rwp); + err |= save_rwin_state(wsaved, rwp); + err |= __put_user(rwp, &sf->rwin_save); + } else { + err |= __put_user(0, &sf->rwin_save); + } err |= __copy_to_user(&sf->mask, &oldset->sig[0], sizeof(sigset_t)); /* Setup sigaltstack */ @@ -437,8 +416,15 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, err |= __put_user(sas_ss_flags(regs->u_regs[UREG_FP]), &sf->stack.ss_flags); err |= __put_user(current->sas_ss_size, &sf->stack.ss_size); - err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP], - sizeof(struct reg_window32)); + if (!wsaved) { + err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP], + sizeof(struct reg_window32)); + } else { + struct reg_window32 *rp; + + rp = ¤t_thread_info()->reg_window[wsaved - 1]; + err |= __copy_to_user(sf, rp, sizeof(struct reg_window32)); + } err |= copy_siginfo_to_user(&sf->info, info); diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 006fe45..47509df 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -34,6 +34,7 @@ #include "entry.h" #include "systbls.h" +#include "sigutil.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -236,7 +237,7 @@ struct rt_signal_frame { __siginfo_fpu_t __user *fpu_save; stack_t stack; sigset_t mask; - __siginfo_fpu_t fpu_state; + __siginfo_rwin_t *rwin_save; }; static long _sigpause_common(old_sigset_t set) @@ -266,33 +267,12 @@ asmlinkage long sys_sigsuspend(old_sigset_t set) return _sigpause_common(set); } -static inline int -restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) -{ - unsigned long *fpregs = current_thread_info()->fpregs; - unsigned long fprs; - int err; - - err = __get_user(fprs, &fpu->si_fprs); - fprs_write(0); - regs->tstate &= ~TSTATE_PEF; - if (fprs & FPRS_DL) - err |= copy_from_user(fpregs, &fpu->si_float_regs[0], - (sizeof(unsigned int) * 32)); - if (fprs & FPRS_DU) - err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], - (sizeof(unsigned int) * 32)); - err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr); - err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr); - current_thread_info()->fpsaved[0] |= fprs; - return err; -} - void do_rt_sigreturn(struct pt_regs *regs) { struct rt_signal_frame __user *sf; unsigned long tpc, tnpc, tstate; __siginfo_fpu_t __user *fpu_save; + __siginfo_rwin_t __user *rwin_save; sigset_t set; int err; @@ -325,8 +305,8 @@ void do_rt_sigreturn(struct pt_regs *regs) regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC)); err |= __get_user(fpu_save, &sf->fpu_save); - if (fpu_save) - err |= restore_fpu_state(regs, &sf->fpu_state); + if (!err && fpu_save) + err |= restore_fpu_state(regs, fpu_save); err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t)); err |= do_sigaltstack(&sf->stack, NULL, (unsigned long)sf); @@ -334,6 +314,12 @@ void do_rt_sigreturn(struct pt_regs *regs) if (err) goto segv; + err |= __get_user(rwin_save, &sf->rwin_save); + if (!err && rwin_save) { + if (restore_rwin_state(rwin_save)) + goto segv; + } + regs->tpc = tpc; regs->tnpc = tnpc; @@ -351,34 +337,13 @@ segv: } /* Checks if the fp is valid */ -static int invalid_frame_pointer(void __user *fp, int fplen) +static int invalid_frame_pointer(void __user *fp) { if (((unsigned long) fp) & 15) return 1; return 0; } -static inline int -save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) -{ - unsigned long *fpregs = current_thread_info()->fpregs; - unsigned long fprs; - int err = 0; - - fprs = current_thread_info()->fpsaved[0]; - if (fprs & FPRS_DL) - err |= copy_to_user(&fpu->si_float_regs[0], fpregs, - (sizeof(unsigned int) * 32)); - if (fprs & FPRS_DU) - err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16, - (sizeof(unsigned int) * 32)); - err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr); - err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr); - err |= __put_user(fprs, &fpu->si_fprs); - - return err; -} - static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, unsigned long framesize) { unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS; @@ -414,34 +379,48 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, int signo, sigset_t *oldset, siginfo_t *info) { struct rt_signal_frame __user *sf; - int sigframe_size, err; + int wsaved, err, sf_size; + void __user *tail; /* 1. Make sure everything is clean */ synchronize_user_stack(); save_and_clear_fpu(); - sigframe_size = sizeof(struct rt_signal_frame); - if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) - sigframe_size -= sizeof(__siginfo_fpu_t); + wsaved = get_thread_wsaved(); + sf_size = sizeof(struct rt_signal_frame); + if (current_thread_info()->fpsaved[0] & FPRS_FEF) + sf_size += sizeof(__siginfo_fpu_t); + if (wsaved) + sf_size += sizeof(__siginfo_rwin_t); sf = (struct rt_signal_frame __user *) - get_sigframe(ka, regs, sigframe_size); - - if (invalid_frame_pointer (sf, sigframe_size)) - goto sigill; + get_sigframe(ka, regs, sf_size); - if (get_thread_wsaved() != 0) + if (invalid_frame_pointer (sf)) goto sigill; + tail = (sf + 1); + /* 2. Save the current process state */ err = copy_to_user(&sf->regs, regs, sizeof (*regs)); if (current_thread_info()->fpsaved[0] & FPRS_FEF) { - err |= save_fpu_state(regs, &sf->fpu_state); - err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save); + __siginfo_fpu_t __user *fpu_save = tail; + tail += sizeof(__siginfo_fpu_t); + err |= save_fpu_state(regs, fpu_save); + err |= __put_user((u64)fpu_save, &sf->fpu_save); } else { err |= __put_user(0, &sf->fpu_save); } + if (wsaved) { + __siginfo_rwin_t __user *rwin_save = tail; + tail += sizeof(__siginfo_rwin_t); + err |= save_rwin_state(wsaved, rwin_save); + err |= __put_user((u64)rwin_save, &sf->rwin_save); + set_thread_wsaved(0); + } else { + err |= __put_user(0, &sf->rwin_save); + } /* Setup sigaltstack */ err |= __put_user(current->sas_ss_sp, &sf->stack.ss_sp); @@ -450,10 +429,17 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, err |= copy_to_user(&sf->mask, oldset, sizeof(sigset_t)); - err |= copy_in_user((u64 __user *)sf, - (u64 __user *)(regs->u_regs[UREG_FP]+STACK_BIAS), - sizeof(struct reg_window)); + if (!wsaved) { + err |= copy_in_user((u64 __user *)sf, + (u64 __user *)(regs->u_regs[UREG_FP] + + STACK_BIAS), + sizeof(struct reg_window)); + } else { + struct reg_window *rp; + rp = ¤t_thread_info()->reg_window[wsaved - 1]; + err |= copy_to_user(sf, rp, sizeof(struct reg_window)); + } if (info) err |= copy_siginfo_to_user(&sf->info, info); else { diff --git a/arch/sparc/kernel/sigutil.h b/arch/sparc/kernel/sigutil.h new file mode 100644 index 0000000..d223aa4 --- /dev/null +++ b/arch/sparc/kernel/sigutil.h @@ -0,0 +1,9 @@ +#ifndef _SIGUTIL_H +#define _SIGUTIL_H + +int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu); +int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu); +int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin); +int restore_rwin_state(__siginfo_rwin_t __user *rp); + +#endif /* _SIGUTIL_H */ diff --git a/arch/sparc/kernel/sigutil_32.c b/arch/sparc/kernel/sigutil_32.c new file mode 100644 index 0000000..35c7897 --- /dev/null +++ b/arch/sparc/kernel/sigutil_32.c @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "sigutil.h" + +int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) +{ + int err = 0; +#ifdef CONFIG_SMP + if (test_tsk_thread_flag(current, TIF_USEDFPU)) { + put_psr(get_psr() | PSR_EF); + fpsave(¤t->thread.float_regs[0], ¤t->thread.fsr, + ¤t->thread.fpqueue[0], ¤t->thread.fpqdepth); + regs->psr &= ~(PSR_EF); + clear_tsk_thread_flag(current, TIF_USEDFPU); + } +#else + if (current == last_task_used_math) { + put_psr(get_psr() | PSR_EF); + fpsave(¤t->thread.float_regs[0], ¤t->thread.fsr, + ¤t->thread.fpqueue[0], ¤t->thread.fpqdepth); + last_task_used_math = NULL; + regs->psr &= ~(PSR_EF); + } +#endif + err |= __copy_to_user(&fpu->si_float_regs[0], + ¤t->thread.float_regs[0], + (sizeof(unsigned long) * 32)); + err |= __put_user(current->thread.fsr, &fpu->si_fsr); + err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth); + if (current->thread.fpqdepth != 0) + err |= __copy_to_user(&fpu->si_fpqueue[0], + ¤t->thread.fpqueue[0], + ((sizeof(unsigned long) + + (sizeof(unsigned long *)))*16)); + clear_used_math(); + return err; +} + +int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) +{ + int err; +#ifdef CONFIG_SMP + if (test_tsk_thread_flag(current, TIF_USEDFPU)) + regs->psr &= ~PSR_EF; +#else + if (current == last_task_used_math) { + last_task_used_math = NULL; + regs->psr &= ~PSR_EF; + } +#endif + set_used_math(); + clear_tsk_thread_flag(current, TIF_USEDFPU); + + if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu))) + return -EFAULT; + + err = __copy_from_user(¤t->thread.float_regs[0], &fpu->si_float_regs[0], + (sizeof(unsigned long) * 32)); + err |= __get_user(current->thread.fsr, &fpu->si_fsr); + err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth); + if (current->thread.fpqdepth != 0) + err |= __copy_from_user(¤t->thread.fpqueue[0], + &fpu->si_fpqueue[0], + ((sizeof(unsigned long) + + (sizeof(unsigned long *)))*16)); + return err; +} + +int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin) +{ + int i, err = __put_user(wsaved, &rwin->wsaved); + + for (i = 0; i < wsaved; i++) { + struct reg_window32 *rp; + unsigned long fp; + + rp = ¤t_thread_info()->reg_window[i]; + fp = current_thread_info()->rwbuf_stkptrs[i]; + err |= copy_to_user(&rwin->reg_window[i], rp, + sizeof(struct reg_window32)); + err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]); + } + return err; +} + +int restore_rwin_state(__siginfo_rwin_t __user *rp) +{ + struct thread_info *t = current_thread_info(); + int i, wsaved, err; + + __get_user(wsaved, &rp->wsaved); + if (wsaved > NSWINS) + return -EFAULT; + + err = 0; + for (i = 0; i < wsaved; i++) { + err |= copy_from_user(&t->reg_window[i], + &rp->reg_window[i], + sizeof(struct reg_window32)); + err |= __get_user(t->rwbuf_stkptrs[i], + &rp->rwbuf_stkptrs[i]); + } + if (err) + return err; + + t->w_saved = wsaved; + synchronize_user_stack(); + if (t->w_saved) + return -EFAULT; + return 0; + +} diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c new file mode 100644 index 0000000..e7dc508 --- /dev/null +++ b/arch/sparc/kernel/sigutil_64.c @@ -0,0 +1,93 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include "sigutil.h" + +int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) +{ + unsigned long *fpregs = current_thread_info()->fpregs; + unsigned long fprs; + int err = 0; + + fprs = current_thread_info()->fpsaved[0]; + if (fprs & FPRS_DL) + err |= copy_to_user(&fpu->si_float_regs[0], fpregs, + (sizeof(unsigned int) * 32)); + if (fprs & FPRS_DU) + err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16, + (sizeof(unsigned int) * 32)); + err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr); + err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr); + err |= __put_user(fprs, &fpu->si_fprs); + + return err; +} + +int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) +{ + unsigned long *fpregs = current_thread_info()->fpregs; + unsigned long fprs; + int err; + + err = __get_user(fprs, &fpu->si_fprs); + fprs_write(0); + regs->tstate &= ~TSTATE_PEF; + if (fprs & FPRS_DL) + err |= copy_from_user(fpregs, &fpu->si_float_regs[0], + (sizeof(unsigned int) * 32)); + if (fprs & FPRS_DU) + err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], + (sizeof(unsigned int) * 32)); + err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr); + err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr); + current_thread_info()->fpsaved[0] |= fprs; + return err; +} + +int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin) +{ + int i, err = __put_user(wsaved, &rwin->wsaved); + + for (i = 0; i < wsaved; i++) { + struct reg_window *rp = ¤t_thread_info()->reg_window[i]; + unsigned long fp = current_thread_info()->rwbuf_stkptrs[i]; + + err |= copy_to_user(&rwin->reg_window[i], rp, + sizeof(struct reg_window)); + err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]); + } + return err; +} + +int restore_rwin_state(__siginfo_rwin_t __user *rp) +{ + struct thread_info *t = current_thread_info(); + int i, wsaved, err; + + __get_user(wsaved, &rp->wsaved); + if (wsaved > NSWINS) + return -EFAULT; + + err = 0; + for (i = 0; i < wsaved; i++) { + err |= copy_from_user(&t->reg_window[i], + &rp->reg_window[i], + sizeof(struct reg_window)); + err |= __get_user(t->rwbuf_stkptrs[i], + &rp->rwbuf_stkptrs[i]); + } + if (err) + return err; + + set_thread_wsaved(wsaved); + synchronize_user_stack(); + if (get_thread_wsaved()) + return -EFAULT; + return 0; +} -- cgit v1.1 From 47c08f3107270e5a439bc0106a308f7c48c9621d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 20 Aug 2011 11:49:43 -0700 Subject: pci: fix new kernel-doc warning in pci.c Fix new kernel-doc warning in pci.c: Warning(drivers/pci/pci.c:3259): No description found for parameter 'mps' Warning(drivers/pci/pci.c:3259): Excess function parameter 'rq' description in 'pcie_set_mps' Signed-off-by: Randy Dunlap Cc: Jesse Barnes Signed-off-by: Linus Torvalds --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 466fad6..0ce6742 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3250,7 +3250,7 @@ int pcie_get_mps(struct pci_dev *dev) /** * pcie_set_mps - set PCI Express maximum payload size * @dev: PCI device to query - * @rq: maximum payload size in bytes + * @mps: maximum payload size in bytes * valid values are 128, 256, 512, 1024, 2048, 4096 * * If possible sets maximum payload size -- cgit v1.1 From 450a37d2eca6ddf6ea8186f57a7531318df6e796 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sun, 21 Aug 2011 00:28:56 -0600 Subject: OMAP4: clock: fix compile warning Fix the following compile warning: arch/arm/mach-omap2/clock44xx_data.c: In function 'omap4xxx_clk_init': arch/arm/mach-omap2/clock44xx_data.c:3371:6: warning: 'cpu_clkflg' may be used uninitialized in this function The approach taken here is intended to work if omap4xxx_clk_init() is converted into an initcall. Thanks to Bjarne Steinsbo for proposing another approach. Signed-off-by: Paul Walmsley Cc: Bjarne Steinsbo --- arch/arm/mach-omap2/clock44xx_data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c index 2af0e3f0..4304768 100644 --- a/arch/arm/mach-omap2/clock44xx_data.c +++ b/arch/arm/mach-omap2/clock44xx_data.c @@ -3376,6 +3376,8 @@ int __init omap4xxx_clk_init(void) } else if (cpu_is_omap446x()) { cpu_mask = RATE_IN_4460; cpu_clkflg = CK_446X; + } else { + return 0; } clk_init(&omap2_clk_functions); -- cgit v1.1 From 6719db6a23d4b7f1e5052eedae394135e3aef9c1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Sat, 20 Aug 2011 08:29:51 -0400 Subject: Btrfs: fix 64 bit divide problem This fixes a regression introduced by commit cdcb725c05fe ("Btrfs: check if there is enough space for balancing smarter"). We can't do 64-bit divides on 32-bit architectures. In cases where we need to divide/multiply by 2 we should just left/right shift respectively, and in cases where theres N number of devices use do_div. Also make the counters u64 to match up with rw_devices. Thanks, Signed-off-by: Josef Bacik Acked-and-tested-by: Ingo Molnar Signed-off-by: Linus Torvalds --- fs/btrfs/extent-tree.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 80d6148..f5be06a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6735,9 +6735,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; struct btrfs_device *device; u64 min_free; + u64 dev_min = 1; + u64 dev_nr = 0; int index; - int dev_nr = 0; - int dev_min = 1; int full = 0; int ret = 0; @@ -6796,14 +6796,16 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) index = get_block_group_index(block_group); if (index == 0) { dev_min = 4; - min_free /= 2; + /* Divide by 2 */ + min_free >>= 1; } else if (index == 1) { dev_min = 2; } else if (index == 2) { - min_free *= 2; + /* Multiply by 2 */ + min_free <<= 1; } else if (index == 3) { dev_min = fs_devices->rw_devices; - min_free /= dev_min; + do_div(min_free, dev_min); } mutex_lock(&root->fs_info->chunk_mutex); -- cgit v1.1 From 2782a35132339574b06ce30556eb9f97eb1d26cd Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 21 Aug 2011 12:48:04 -0700 Subject: Input: tnetv107x-ts - add missing include of linux/module.h tnetv107x-ts.c uses interfaces from linux/module.h, so it should include that file. This patch fixes build errors. Signed-off-by: Axel Lin Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/tnetv107x-ts.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/tnetv107x-ts.c b/drivers/input/touchscreen/tnetv107x-ts.c index 089b0a0..0e8f63e 100644 --- a/drivers/input/touchscreen/tnetv107x-ts.c +++ b/drivers/input/touchscreen/tnetv107x-ts.c @@ -13,6 +13,7 @@ * GNU General Public License for more details. */ +#include #include #include #include -- cgit v1.1 From b9cc510b395543cb7dba89c76421d23ed9e85f95 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 21 Aug 2011 12:48:08 -0700 Subject: Input: ep93xx_keypad - add missing include of linux/module.h ep93xx_keypad.c uses interfaces from linux/module.h, so it should include that file. This patch fixes build errors. Signed-off-by: Axel Lin Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/ep93xx_keypad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c index c8242dd..aa17e02 100644 --- a/drivers/input/keyboard/ep93xx_keypad.c +++ b/drivers/input/keyboard/ep93xx_keypad.c @@ -20,6 +20,7 @@ * flag. */ +#include #include #include #include -- cgit v1.1 From ffb57c4b8612c31204b06713770f6df4b8a94e4f Mon Sep 17 00:00:00 2001 From: Jay Estabrook Date: Wed, 6 Jul 2011 23:57:13 +0000 Subject: drm/radeon/alpha: Add Alpha support to Radeon DRM code Alpha needs to have available the system bus address for the Radeon's local memory, so that it can be used in ttm_bo_vm_fault(), when building the PTEs for accessing that VRAM. So, we make bus.addr hold the ioremap() return, and then we can modify bus.base appropriately for use during page fault processing. Signed-off-by: Jay Estabrook Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_ttm.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 60125dd..9b86fb0 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -450,6 +450,29 @@ static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ return -EINVAL; mem->bus.base = rdev->mc.aper_base; mem->bus.is_iomem = true; +#ifdef __alpha__ + /* + * Alpha: use bus.addr to hold the ioremap() return, + * so we can modify bus.base below. + */ + if (mem->placement & TTM_PL_FLAG_WC) + mem->bus.addr = + ioremap_wc(mem->bus.base + mem->bus.offset, + mem->bus.size); + else + mem->bus.addr = + ioremap_nocache(mem->bus.base + mem->bus.offset, + mem->bus.size); + + /* + * Alpha: Use just the bus offset plus + * the hose/domain memory base for bus.base. + * It then can be used to build PTEs for VRAM + * access, as done in ttm_bo_vm_fault(). + */ + mem->bus.base = (mem->bus.base & 0x0ffffffffUL) + + rdev->ddev->hose->dense_mem_base; +#endif break; default: return -EINVAL; -- cgit v1.1 From 24cae9e7c9537fd6a16bc2f5ec398ee4bef5d007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 19 Aug 2011 15:24:16 +0000 Subject: drm/radeon: Take IH ring into account for test size calculation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_test.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index dee4a0c..1ebd0fe 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -40,10 +40,14 @@ void radeon_test_moves(struct radeon_device *rdev) size = 1024 * 1024; /* Number of tests = - * (Total GTT - IB pool - writeback page - ring buffer) / test size + * (Total GTT - IB pool - writeback page - ring buffers) / test size */ - n = ((u32)(rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - RADEON_GPU_PAGE_SIZE - - rdev->cp.ring_size)) / size; + n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - rdev->cp.ring_size; + if (rdev->wb.wb_obj) + n -= RADEON_GPU_PAGE_SIZE; + if (rdev->ih.ring_obj) + n -= rdev->ih.ring_size; + n /= size; gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL); if (!gtt_obj) { -- cgit v1.1 From 4fb1a35c0185f8fa3e71b12de62b8752a9a9ed0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 19 Aug 2011 15:24:17 +0000 Subject: drm/radeon: Explicitly print GTT/VRAM offsets on test failure. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise these would need to be painstakingly calculated looking at the source code. Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_test.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 1ebd0fe..602fa35 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -136,9 +136,15 @@ void radeon_test_moves(struct radeon_device *rdev) gtt_start++, vram_start++) { if (*vram_start != gtt_start) { DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, " - "expected 0x%p (GTT map 0x%p-0x%p)\n", - i, *vram_start, gtt_start, gtt_map, - gtt_end); + "expected 0x%p (GTT/VRAM offset " + "0x%16llx/0x%16llx)\n", + i, *vram_start, gtt_start, + (unsigned long long) + (gtt_addr - rdev->mc.gtt_start + + (void*)gtt_start - gtt_map), + (unsigned long long) + (vram_addr - rdev->mc.vram_start + + (void*)gtt_start - gtt_map)); radeon_bo_kunmap(vram_obj); goto out_cleanup; } @@ -179,9 +185,15 @@ void radeon_test_moves(struct radeon_device *rdev) gtt_start++, vram_start++) { if (*gtt_start != vram_start) { DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, " - "expected 0x%p (VRAM map 0x%p-0x%p)\n", - i, *gtt_start, vram_start, vram_map, - vram_end); + "expected 0x%p (VRAM/GTT offset " + "0x%16llx/0x%16llx)\n", + i, *gtt_start, vram_start, + (unsigned long long) + (vram_addr - rdev->mc.vram_start + + (void*)vram_start - vram_map), + (unsigned long long) + (gtt_addr - rdev->mc.gtt_start + + (void*)vram_start - vram_map)); radeon_bo_kunmap(gtt_obj[i]); goto out_cleanup; } -- cgit v1.1 From ba95c45a78d57ac05bf45d81b92a6ec4d299695d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 19 Aug 2011 15:24:18 +0000 Subject: drm/radeon: Make vramlimit parameter actually work. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index a3b011b..b51e157 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -301,6 +301,8 @@ void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 mc->mc_vram_size = mc->aper_size; } mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; + if (radeon_vram_limit && radeon_vram_limit < mc->real_vram_size) + mc->real_vram_size = radeon_vram_limit; dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n", mc->mc_vram_size >> 20, mc->vram_start, mc->vram_end, mc->real_vram_size >> 20); -- cgit v1.1 From 4f41adfd8ce9ff84fa9e968e0fe2854e9bc6fcb0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 20 Aug 2011 10:23:38 +0100 Subject: ASoC: WM8996 record paths need AIFCLK Make AIFCLK supply the record paths otherwise record will not work unless there is a simultaneous playback. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm8996.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c index ab8e9d1..85f43b6 100644 --- a/sound/soc/codecs/wm8996.c +++ b/sound/soc/codecs/wm8996.c @@ -1213,6 +1213,16 @@ static const struct snd_soc_dapm_route wm8996_dapm_routes[] = { { "AIF2RX0", NULL, "AIFCLK" }, { "AIF2RX1", NULL, "AIFCLK" }, + { "AIF1TX0", NULL, "AIFCLK" }, + { "AIF1TX1", NULL, "AIFCLK" }, + { "AIF1TX2", NULL, "AIFCLK" }, + { "AIF1TX3", NULL, "AIFCLK" }, + { "AIF1TX4", NULL, "AIFCLK" }, + { "AIF1TX5", NULL, "AIFCLK" }, + + { "AIF2TX0", NULL, "AIFCLK" }, + { "AIF2TX1", NULL, "AIFCLK" }, + { "DSP1RXL", NULL, "SYSDSPCLK" }, { "DSP1RXR", NULL, "SYSDSPCLK" }, { "DSP2RXL", NULL, "SYSDSPCLK" }, -- cgit v1.1 From 7691cd74c5d6b173e1483277fb70d7798e97d2fa Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 20 Aug 2011 16:59:27 +0100 Subject: ASoC: Fix configuration of WM8996 input enables There's no need for separate widgets for the enables (as the map already shows). Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm8996.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c index 85f43b6..e76d4ed 100644 --- a/sound/soc/codecs/wm8996.c +++ b/sound/soc/codecs/wm8996.c @@ -988,15 +988,10 @@ SND_SOC_DAPM_MICBIAS("MICB1", WM8996_POWER_MANAGEMENT_1, 8, 0), SND_SOC_DAPM_PGA("IN1L PGA", WM8996_POWER_MANAGEMENT_2, 5, 0, NULL, 0), SND_SOC_DAPM_PGA("IN1R PGA", WM8996_POWER_MANAGEMENT_2, 4, 0, NULL, 0), -SND_SOC_DAPM_MUX("IN1L Mux", SND_SOC_NOPM, 0, 0, &in1_mux), -SND_SOC_DAPM_MUX("IN1R Mux", SND_SOC_NOPM, 0, 0, &in1_mux), -SND_SOC_DAPM_MUX("IN2L Mux", SND_SOC_NOPM, 0, 0, &in2_mux), -SND_SOC_DAPM_MUX("IN2R Mux", SND_SOC_NOPM, 0, 0, &in2_mux), - -SND_SOC_DAPM_PGA("IN1L", WM8996_POWER_MANAGEMENT_7, 2, 0, NULL, 0), -SND_SOC_DAPM_PGA("IN1R", WM8996_POWER_MANAGEMENT_7, 3, 0, NULL, 0), -SND_SOC_DAPM_PGA("IN2L", WM8996_POWER_MANAGEMENT_7, 6, 0, NULL, 0), -SND_SOC_DAPM_PGA("IN2R", WM8996_POWER_MANAGEMENT_7, 7, 0, NULL, 0), +SND_SOC_DAPM_MUX("IN1L Mux", WM8996_POWER_MANAGEMENT_7, 2, 0, &in1_mux), +SND_SOC_DAPM_MUX("IN1R Mux", WM8996_POWER_MANAGEMENT_7, 3, 0, &in1_mux), +SND_SOC_DAPM_MUX("IN2L Mux", WM8996_POWER_MANAGEMENT_7, 6, 0, &in2_mux), +SND_SOC_DAPM_MUX("IN2R Mux", WM8996_POWER_MANAGEMENT_7, 7, 0, &in2_mux), SND_SOC_DAPM_SUPPLY("DMIC2", WM8996_POWER_MANAGEMENT_7, 9, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("DMIC1", WM8996_POWER_MANAGEMENT_7, 8, 0, NULL, 0), -- cgit v1.1 From f79e7ff85223e054c2967820d3be1c125a903bd4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 21 Aug 2011 12:20:00 +0100 Subject: ASoC: Ensure we only run Speyside WM8962 bias level callbacks once We get called once per DAPM context but only need to run once. When DAPM was serialized this was a series of noops but now it can run in parallel we need to take proper care. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/samsung/speyside_wm8962.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/samsung/speyside_wm8962.c b/sound/soc/samsung/speyside_wm8962.c index 0b9eb5f..72535f2 100644 --- a/sound/soc/samsung/speyside_wm8962.c +++ b/sound/soc/samsung/speyside_wm8962.c @@ -23,6 +23,9 @@ static int speyside_wm8962_set_bias_level(struct snd_soc_card *card, struct snd_soc_dai *codec_dai = card->rtd[0].codec_dai; int ret; + if (dapm->dev != codec_dai->dev) + return 0; + switch (level) { case SND_SOC_BIAS_PREPARE: if (dapm->bias_level == SND_SOC_BIAS_STANDBY) { @@ -57,6 +60,9 @@ static int speyside_wm8962_set_bias_level_post(struct snd_soc_card *card, struct snd_soc_dai *codec_dai = card->rtd[0].codec_dai; int ret; + if (dapm->dev != codec_dai->dev) + return 0; + switch (level) { case SND_SOC_BIAS_STANDBY: ret = snd_soc_dai_set_sysclk(codec_dai, WM8962_SYSCLK_MCLK, -- cgit v1.1 From 4df0cb2fa977f99963b616487a22ebd021ea5463 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 21 Aug 2011 17:18:52 +0100 Subject: ASoC: Clear any outstanding WM8962 FLL lock completions before waiting Ensure that we don't spuriously trigger early. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm8962.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 28650ed..1725550 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2221,6 +2221,8 @@ static int sysclk_event(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_PRE_PMU: if (fll) { + try_wait_for_completion(&wm8962->fll_lock); + snd_soc_update_bits(codec, WM8962_FLL_CONTROL_1, WM8962_FLL_ENA, WM8962_FLL_ENA); if (wm8962->irq) { @@ -3284,6 +3286,8 @@ static int wm8962_set_fll(struct snd_soc_codec *codec, int fll_id, int source, snd_soc_write(codec, WM8962_FLL_CONTROL_7, fll_div.lambda); snd_soc_write(codec, WM8962_FLL_CONTROL_8, fll_div.n); + try_wait_for_completion(&wm8962->fll_lock); + snd_soc_update_bits(codec, WM8962_FLL_CONTROL_1, WM8962_FLL_FRAC | WM8962_FLL_REFCLK_SRC_MASK | WM8962_FLL_ENA, fll1); -- cgit v1.1 From a41619455c0e28b6973471e87f1702c6129d3439 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 16 Aug 2011 16:57:58 +0900 Subject: ASoC: Clear completions from late WM8996 FLL lock IRQs In case we have a pending completion, for example due to a problem with the input clock which got corrected after we timed out. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm8996.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c index e76d4ed..0936ae5 100644 --- a/sound/soc/codecs/wm8996.c +++ b/sound/soc/codecs/wm8996.c @@ -2111,6 +2111,9 @@ static int wm8996_set_fll(struct snd_soc_codec *codec, int fll_id, int source, snd_soc_write(codec, WM8996_FLL_EFS_1, fll_div.lambda); + /* Clear any pending completions (eg, from failed startups) */ + try_wait_for_completion(&wm8996->fll_lock); + snd_soc_update_bits(codec, WM8996_FLL_CONTROL_1, WM8996_FLL_ENA, WM8996_FLL_ENA); -- cgit v1.1 From 0d6cfa3a75f5cde5b3ca0dde748fd22625b4f34c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20B=C3=A9nard?= Date: Mon, 22 Aug 2011 15:41:46 +0100 Subject: ARM: 7051/1: cpuimx* boards: fix mach-types errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I made some changes to the entry in the ARM Machine Registry after submission which was the wrong thing to do. This patch should help to fix this error. Signed-off-by: Eric Bénard Signed-off-by: Russell King --- arch/arm/mach-imx/mach-cpuimx27.c | 2 +- arch/arm/mach-imx/mach-cpuimx35.c | 2 +- arch/arm/mach-imx/mach-eukrea_cpuimx25.c | 2 +- arch/arm/tools/mach-types | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-imx/mach-cpuimx27.c b/arch/arm/mach-imx/mach-cpuimx27.c index 87887ac..f851fe9 100644 --- a/arch/arm/mach-imx/mach-cpuimx27.c +++ b/arch/arm/mach-imx/mach-cpuimx27.c @@ -310,7 +310,7 @@ static struct sys_timer eukrea_cpuimx27_timer = { .init = eukrea_cpuimx27_timer_init, }; -MACHINE_START(CPUIMX27, "EUKREA CPUIMX27") +MACHINE_START(EUKREA_CPUIMX27, "EUKREA CPUIMX27") .boot_params = MX27_PHYS_OFFSET + 0x100, .map_io = mx27_map_io, .init_early = imx27_init_early, diff --git a/arch/arm/mach-imx/mach-cpuimx35.c b/arch/arm/mach-imx/mach-cpuimx35.c index f39a478b..4bd083b 100644 --- a/arch/arm/mach-imx/mach-cpuimx35.c +++ b/arch/arm/mach-imx/mach-cpuimx35.c @@ -192,7 +192,7 @@ struct sys_timer eukrea_cpuimx35_timer = { .init = eukrea_cpuimx35_timer_init, }; -MACHINE_START(EUKREA_CPUIMX35, "Eukrea CPUIMX35") +MACHINE_START(EUKREA_CPUIMX35SD, "Eukrea CPUIMX35") /* Maintainer: Eukrea Electromatique */ .boot_params = MX3x_PHYS_OFFSET + 0x100, .map_io = mx35_map_io, diff --git a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c index da36da5..2442d5d 100644 --- a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c +++ b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c @@ -161,7 +161,7 @@ static struct sys_timer eukrea_cpuimx25_timer = { .init = eukrea_cpuimx25_timer_init, }; -MACHINE_START(EUKREA_CPUIMX25, "Eukrea CPUIMX25") +MACHINE_START(EUKREA_CPUIMX25SD, "Eukrea CPUIMX25") /* Maintainer: Eukrea Electromatique */ .boot_params = MX25_PHYS_OFFSET + 0x100, .map_io = mx25_map_io, diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types index fff68d0..62cc8f9 100644 --- a/arch/arm/tools/mach-types +++ b/arch/arm/tools/mach-types @@ -351,7 +351,7 @@ centro MACH_CENTRO CENTRO 1944 nokia_rx51 MACH_NOKIA_RX51 NOKIA_RX51 1955 omap_zoom2 MACH_OMAP_ZOOM2 OMAP_ZOOM2 1967 cpuat9260 MACH_CPUAT9260 CPUAT9260 1973 -eukrea_cpuimx27 MACH_CPUIMX27 CPUIMX27 1975 +eukrea_cpuimx27 MACH_EUKREA_CPUIMX27 EUKREA_CPUIMX27 1975 acs5k MACH_ACS5K ACS5K 1982 snapper_9260 MACH_SNAPPER_9260 SNAPPER_9260 1987 dsm320 MACH_DSM320 DSM320 1988 @@ -476,8 +476,8 @@ cns3420vb MACH_CNS3420VB CNS3420VB 2776 omap4_panda MACH_OMAP4_PANDA OMAP4_PANDA 2791 ti8168evm MACH_TI8168EVM TI8168EVM 2800 teton_bga MACH_TETON_BGA TETON_BGA 2816 -eukrea_cpuimx25sd MACH_EUKREA_CPUIMX25 EUKREA_CPUIMX25 2820 -eukrea_cpuimx35sd MACH_EUKREA_CPUIMX35 EUKREA_CPUIMX35 2821 +eukrea_cpuimx25sd MACH_EUKREA_CPUIMX25SD EUKREA_CPUIMX25SD 2820 +eukrea_cpuimx35sd MACH_EUKREA_CPUIMX35SD EUKREA_CPUIMX35SD 2821 eukrea_cpuimx51sd MACH_EUKREA_CPUIMX51SD EUKREA_CPUIMX51SD 2822 eukrea_cpuimx51 MACH_EUKREA_CPUIMX51 EUKREA_CPUIMX51 2823 smdkc210 MACH_SMDKC210 SMDKC210 2838 -- cgit v1.1 From 3c05c4bed4ccce3f22f6d7899b308faae24ad198 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 17 Aug 2011 15:15:00 +0200 Subject: xen: Do not enable PV IPIs when vector callback not present Fix regression for HVM case on older (<4.1.1) hypervisors caused by commit 99bbb3a84a99cd04ab16b998b20f01a72cfa9f4f Author: Stefano Stabellini Date: Thu Dec 2 17:55:10 2010 +0000 xen: PV on HVM: support PV spinlocks and IPIs This change replaced the SMP operations with event based handlers without taking into account that this only works when the hypervisor supports callback vectors. This causes unexplainable hangs early on boot for HVM guests with more than one CPU. BugLink: http://bugs.launchpad.net/bugs/791850 CC: stable@kernel.org Signed-off-by: Stefan Bader Signed-off-by: Stefano Stabellini Tested-and-Reported-by: Stefan Bader Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index b4533a8..e79dbb9 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -521,8 +521,6 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) native_smp_prepare_cpus(max_cpus); WARN_ON(xen_smp_intr_init(0)); - if (!xen_have_vector_callback) - return; xen_init_lock_cpu(0); xen_init_spinlocks(); } @@ -546,6 +544,8 @@ static void xen_hvm_cpu_die(unsigned int cpu) void __init xen_hvm_smp_init(void) { + if (!xen_have_vector_callback) + return; smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; smp_ops.smp_send_reschedule = xen_smp_send_reschedule; smp_ops.cpu_up = xen_hvm_cpu_up; -- cgit v1.1 From 60c5f08e154fd235056645e050f2cd5671b19125 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Aug 2011 13:17:20 -0700 Subject: xen/tracing: Fix tracing config option properly Steven Rostedt says we should use CONFIG_EVENT_TRACING. Cc:Steven Rostedt Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3326204..add2c2d 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -15,7 +15,7 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ grant-table.o suspend.o platform-pci-unplug.o \ p2m.o -obj-$(CONFIG_FTRACE) += trace.o +obj-$(CONFIG_EVENT_TRACING) += trace.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o -- cgit v1.1 From 6f5986bce558e64fe867bff600a2127a3cb0c006 Mon Sep 17 00:00:00 2001 From: Joe Jin Date: Mon, 15 Aug 2011 12:51:31 +0800 Subject: xen-blkback: Don't disconnect backend until state switched to XenbusStateClosed. When do block-attach/block-detach test with below steps, umount hangs in the guest. Furthermore shutdown ends up being stuck when umounting file-systems. 1. start guest. 2. attach new block device by xm block-attach in Dom0. 3. mount new disk in guest. 4. execute xm block-detach to detach the block device in dom0 until timeout 5. Any request to the disk will hung. Root cause: This issue is caused when setting backend device's state to 'XenbusStateClosing', which sends to the frontend the XenbusStateClosing notification. When frontend receives the notification it tries to release the disk in blkfront_closing(), but at that moment the disk is still in use by guest, so frontend refuses to close. Specifically it sets the disk state to XenbusStateClosing and sends the notification to backend - when backend receives the event, it disconnects the vbd from real device, and sets the vbd device state to XenbusStateClosing. The backend disconnects the real device/file, and any IO requests to the disk in guest will end up in ether, leaving disk DEAD and set to XenbusStateClosing. When the guest wants to disconnect the disk, umount will hang on blkif_release()->xlvbd_release_gendisk() as it is unable to send any IO to the disk, which prevents clean system shutdown. Solution: Don't disconnect backend until frontend state switched to XenbusStateClosed. Signed-off-by: Joe Jin Cc: Daniel Stodden Cc: Jens Axboe Cc: Annie Li Cc: Ian Campbell [v1: Modified description a bit] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 6cc0db1..7be3d0f 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -601,11 +601,11 @@ static void frontend_changed(struct xenbus_device *dev, break; case XenbusStateClosing: - xen_blkif_disconnect(be->blkif); xenbus_switch_state(dev, XenbusStateClosing); break; case XenbusStateClosed: + xen_blkif_disconnect(be->blkif); xenbus_switch_state(dev, XenbusStateClosed); if (xenbus_dev_is_online(dev)) break; -- cgit v1.1 From 1bc05b0ae6448b20d46076899e0cc12ad999e50e Mon Sep 17 00:00:00 2001 From: Joe Jin Date: Mon, 15 Aug 2011 12:57:07 +0800 Subject: xen-blkback: fixed indentation and comments This patch fixes belows: 1. Fix code style issue. 2. Fix incorrect functions name in comments. Signed-off-by: Joe Jin Cc: Jens Axboe Cc: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 2 +- drivers/block/xen-blkback/xenbus.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 9e40b28..00c57c9 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -46,7 +46,7 @@ #define DRV_PFX "xen-blkback:" #define DPRINTK(fmt, args...) \ - pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ + pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ __func__, __LINE__, ##args) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 7be3d0f..a96cb5f 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -590,7 +590,7 @@ static void frontend_changed(struct xenbus_device *dev, /* * Enforce precondition before potential leak point. - * blkif_disconnect() is idempotent. + * xen_blkif_disconnect() is idempotent. */ xen_blkif_disconnect(be->blkif); @@ -611,7 +611,7 @@ static void frontend_changed(struct xenbus_device *dev, break; /* fall through if not online */ case XenbusStateUnknown: - /* implies blkif_disconnect() via blkback_remove() */ + /* implies xen_blkif_disconnect() via xen_blkbk_remove() */ device_unregister(&dev->dev); break; -- cgit v1.1 From 5b9063b19caaffe7135e1f9b8b22174ded0f586b Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Sun, 21 Aug 2011 21:04:12 -0700 Subject: Input: ad714xx-spi - force SPI bus into the default 8-bit mode Signed-off-by: Michael Hennerich Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ad714x-spi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c index 4120dd5..da83ac9 100644 --- a/drivers/input/misc/ad714x-spi.c +++ b/drivers/input/misc/ad714x-spi.c @@ -54,6 +54,12 @@ static int ad714x_spi_write(struct device *dev, unsigned short reg, static int __devinit ad714x_spi_probe(struct spi_device *spi) { struct ad714x_chip *chip; + int err; + + spi->bits_per_word = 8; + err = spi_setup(spi); + if (err < 0) + return err; chip = ad714x_probe(&spi->dev, BUS_SPI, spi->irq, ad714x_spi_read, ad714x_spi_write); -- cgit v1.1 From 6337de2204be3b7b40825a1d30de30e514e8947b Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Sun, 21 Aug 2011 21:04:12 -0700 Subject: Input: ad714x - fix endianness issues Allow driver to be used on Big Endian boxes. Signed-off-by: Michael Hennerich Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ad714x-i2c.c | 34 ++++++++++------------------------ drivers/input/misc/ad714x-spi.c | 24 +++++++++++++++--------- 2 files changed, 25 insertions(+), 33 deletions(-) diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c index e21deb1..00a6a22 100644 --- a/drivers/input/misc/ad714x-i2c.c +++ b/drivers/input/misc/ad714x-i2c.c @@ -32,17 +32,12 @@ static int ad714x_i2c_write(struct device *dev, unsigned short reg, { struct i2c_client *client = to_i2c_client(dev); int ret = 0; - u8 *_reg = (u8 *)® - u8 *_data = (u8 *)&data; - - u8 tx[4] = { - _reg[1], - _reg[0], - _data[1], - _data[0] + unsigned short tx[2] = { + cpu_to_be16(reg), + cpu_to_be16(data) }; - ret = i2c_master_send(client, tx, 4); + ret = i2c_master_send(client, (u8 *)tx, 4); if (ret < 0) dev_err(&client->dev, "I2C write error\n"); @@ -54,25 +49,16 @@ static int ad714x_i2c_read(struct device *dev, unsigned short reg, { struct i2c_client *client = to_i2c_client(dev); int ret = 0; - u8 *_reg = (u8 *)® - u8 *_data = (u8 *)data; + unsigned short tx = cpu_to_be16(reg); - u8 tx[2] = { - _reg[1], - _reg[0] - }; - u8 rx[2]; - - ret = i2c_master_send(client, tx, 2); + ret = i2c_master_send(client, (u8 *)&tx, 2); if (ret >= 0) - ret = i2c_master_recv(client, rx, 2); + ret = i2c_master_recv(client, (u8 *)data, 2); - if (unlikely(ret < 0)) { + if (unlikely(ret < 0)) dev_err(&client->dev, "I2C read error\n"); - } else { - _data[0] = rx[1]; - _data[1] = rx[0]; - } + else + *data = be16_to_cpu(*data); return ret; } diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c index da83ac9..0c7f948 100644 --- a/drivers/input/misc/ad714x-spi.c +++ b/drivers/input/misc/ad714x-spi.c @@ -6,7 +6,7 @@ * Licensed under the GPL-2 or later. */ -#include /* BUS_I2C */ +#include /* BUS_SPI */ #include #include #include @@ -30,22 +30,28 @@ static int ad714x_spi_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume); -static int ad714x_spi_read(struct device *dev, unsigned short reg, - unsigned short *data) +static int ad714x_spi_read(struct device *dev, + unsigned short reg, unsigned short *data) { struct spi_device *spi = to_spi_device(dev); - unsigned short tx = AD714x_SPI_CMD_PREFIX | AD714x_SPI_READ | reg; + unsigned short tx = cpu_to_be16(AD714x_SPI_CMD_PREFIX | + AD714x_SPI_READ | reg); + int ret; - return spi_write_then_read(spi, (u8 *)&tx, 2, (u8 *)data, 2); + ret = spi_write_then_read(spi, &tx, 2, data, 2); + + *data = be16_to_cpup(data); + + return ret; } -static int ad714x_spi_write(struct device *dev, unsigned short reg, - unsigned short data) +static int ad714x_spi_write(struct device *dev, + unsigned short reg, unsigned short data) { struct spi_device *spi = to_spi_device(dev); unsigned short tx[2] = { - AD714x_SPI_CMD_PREFIX | reg, - data + cpu_to_be16(AD714x_SPI_CMD_PREFIX | reg), + cpu_to_be16(data) }; return spi_write(spi, (u8 *)tx, 4); -- cgit v1.1 From c0409feb86893f5ccf73964c7b2b47ca64bdb014 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 22 Aug 2011 09:45:39 -0700 Subject: Input: ad714x - use DMA-safe buffers for spi_write() spi_write() requires use of DMA-safe (cacheline aligned) buffers. Also use the same buffers when reading data since to avoid extra locking and potential memory allocation in spi_write_then_read(). Acked-by: Michael Hennerich Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ad714x-i2c.c | 60 ++++++++++++++------------- drivers/input/misc/ad714x-spi.c | 51 ++++++++++++++++------- drivers/input/misc/ad714x.c | 90 ++++++++++++++--------------------------- drivers/input/misc/ad714x.h | 33 ++++++++++++++- 4 files changed, 131 insertions(+), 103 deletions(-) diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c index 00a6a22..6c61218 100644 --- a/drivers/input/misc/ad714x-i2c.c +++ b/drivers/input/misc/ad714x-i2c.c @@ -27,40 +27,46 @@ static int ad714x_i2c_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(ad714x_i2c_pm, ad714x_i2c_suspend, ad714x_i2c_resume); -static int ad714x_i2c_write(struct device *dev, unsigned short reg, - unsigned short data) +static int ad714x_i2c_write(struct ad714x_chip *chip, + unsigned short reg, unsigned short data) { - struct i2c_client *client = to_i2c_client(dev); - int ret = 0; - unsigned short tx[2] = { - cpu_to_be16(reg), - cpu_to_be16(data) - }; - - ret = i2c_master_send(client, (u8 *)tx, 4); - if (ret < 0) - dev_err(&client->dev, "I2C write error\n"); - - return ret; + struct i2c_client *client = to_i2c_client(chip->dev); + int error; + + chip->xfer_buf[0] = cpu_to_be16(reg); + chip->xfer_buf[1] = cpu_to_be16(data); + + error = i2c_master_send(client, (u8 *)chip->xfer_buf, + 2 * sizeof(*chip->xfer_buf)); + if (unlikely(error < 0)) { + dev_err(&client->dev, "I2C write error: %d\n", error); + return error; + } + + return 0; } -static int ad714x_i2c_read(struct device *dev, unsigned short reg, - unsigned short *data) +static int ad714x_i2c_read(struct ad714x_chip *chip, + unsigned short reg, unsigned short *data) { - struct i2c_client *client = to_i2c_client(dev); - int ret = 0; - unsigned short tx = cpu_to_be16(reg); + struct i2c_client *client = to_i2c_client(chip->dev); + int error; + + chip->xfer_buf[0] = cpu_to_be16(reg); - ret = i2c_master_send(client, (u8 *)&tx, 2); - if (ret >= 0) - ret = i2c_master_recv(client, (u8 *)data, 2); + error = i2c_master_send(client, (u8 *)chip->xfer_buf, + sizeof(*chip->xfer_buf)); + if (error >= 0) + error = i2c_master_recv(client, (u8 *)chip->xfer_buf, + sizeof(*chip->xfer_buf)); - if (unlikely(ret < 0)) - dev_err(&client->dev, "I2C read error\n"); - else - *data = be16_to_cpu(*data); + if (unlikely(error < 0)) { + dev_err(&client->dev, "I2C read error: %d\n", error); + return error; + } - return ret; + *data = be16_to_cpup(chip->xfer_buf); + return 0; } static int __devinit ad714x_i2c_probe(struct i2c_client *client, diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c index 0c7f948..306577d 100644 --- a/drivers/input/misc/ad714x-spi.c +++ b/drivers/input/misc/ad714x-spi.c @@ -30,31 +30,54 @@ static int ad714x_spi_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume); -static int ad714x_spi_read(struct device *dev, +static int ad714x_spi_read(struct ad714x_chip *chip, unsigned short reg, unsigned short *data) { - struct spi_device *spi = to_spi_device(dev); - unsigned short tx = cpu_to_be16(AD714x_SPI_CMD_PREFIX | + struct spi_device *spi = to_spi_device(chip->dev); + struct spi_message message; + struct spi_transfer xfer[2]; + int error; + + spi_message_init(&message); + memset(xfer, 0, sizeof(xfer)); + + chip->xfer_buf[0] = cpu_to_be16(AD714x_SPI_CMD_PREFIX | AD714x_SPI_READ | reg); - int ret; + xfer[0].tx_buf = &chip->xfer_buf[0]; + xfer[0].len = sizeof(chip->xfer_buf[0]); + spi_message_add_tail(&xfer[0], &message); - ret = spi_write_then_read(spi, &tx, 2, data, 2); + xfer[1].rx_buf = &chip->xfer_buf[1]; + xfer[1].len = sizeof(chip->xfer_buf[1]); + spi_message_add_tail(&xfer[1], &message); - *data = be16_to_cpup(data); + error = spi_sync(spi, &message); + if (unlikely(error)) { + dev_err(chip->dev, "SPI read error: %d\n", error); + return error; + } - return ret; + *data = be16_to_cpu(chip->xfer_buf[1]); + return 0; } -static int ad714x_spi_write(struct device *dev, +static int ad714x_spi_write(struct ad714x_chip *chip, unsigned short reg, unsigned short data) { - struct spi_device *spi = to_spi_device(dev); - unsigned short tx[2] = { - cpu_to_be16(AD714x_SPI_CMD_PREFIX | reg), - cpu_to_be16(data) - }; + struct spi_device *spi = to_spi_device(chip->dev); + int error; - return spi_write(spi, (u8 *)tx, 4); + chip->xfer_buf[0] = cpu_to_be16(AD714x_SPI_CMD_PREFIX | reg); + chip->xfer_buf[1] = cpu_to_be16(data); + + error = spi_write(spi, (u8 *)chip->xfer_buf, + 2 * sizeof(*chip->xfer_buf)); + if (unlikely(error)) { + dev_err(chip->dev, "SPI write error: %d\n", error); + return error; + } + + return 0; } static int __devinit ad714x_spi_probe(struct spi_device *spi) diff --git a/drivers/input/misc/ad714x.c b/drivers/input/misc/ad714x.c index c3a62c4..2be0366c 100644 --- a/drivers/input/misc/ad714x.c +++ b/drivers/input/misc/ad714x.c @@ -59,7 +59,6 @@ #define STAGE11_AMBIENT 0x27D #define PER_STAGE_REG_NUM 36 -#define STAGE_NUM 12 #define STAGE_CFGREG_NUM 8 #define SYS_CFGREG_NUM 8 @@ -124,28 +123,6 @@ struct ad714x_driver_data { * information to integrate all things which will be private data * of spi/i2c device */ -struct ad714x_chip { - unsigned short h_state; - unsigned short l_state; - unsigned short c_state; - unsigned short adc_reg[STAGE_NUM]; - unsigned short amb_reg[STAGE_NUM]; - unsigned short sensor_val[STAGE_NUM]; - - struct ad714x_platform_data *hw; - struct ad714x_driver_data *sw; - - int irq; - struct device *dev; - ad714x_read_t read; - ad714x_write_t write; - - struct mutex mutex; - - unsigned product; - unsigned version; -}; - static void ad714x_use_com_int(struct ad714x_chip *ad714x, int start_stage, int end_stage) { @@ -154,13 +131,13 @@ static void ad714x_use_com_int(struct ad714x_chip *ad714x, mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1); - ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data); + ad714x->read(ad714x, STG_COM_INT_EN_REG, &data); data |= 1 << end_stage; - ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data); + ad714x->write(ad714x, STG_COM_INT_EN_REG, data); - ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data); + ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data); data &= ~mask; - ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data); + ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data); } static void ad714x_use_thr_int(struct ad714x_chip *ad714x, @@ -171,13 +148,13 @@ static void ad714x_use_thr_int(struct ad714x_chip *ad714x, mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1); - ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data); + ad714x->read(ad714x, STG_COM_INT_EN_REG, &data); data &= ~(1 << end_stage); - ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data); + ad714x->write(ad714x, STG_COM_INT_EN_REG, data); - ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data); + ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data); data |= mask; - ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data); + ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data); } static int ad714x_cal_highest_stage(struct ad714x_chip *ad714x, @@ -274,10 +251,8 @@ static void ad714x_slider_cal_sensor_val(struct ad714x_chip *ad714x, int idx) int i; for (i = hw->start_stage; i <= hw->end_stage; i++) { - ad714x->read(ad714x->dev, CDC_RESULT_S0 + i, - &ad714x->adc_reg[i]); - ad714x->read(ad714x->dev, - STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, + ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]); + ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, &ad714x->amb_reg[i]); ad714x->sensor_val[i] = abs(ad714x->adc_reg[i] - @@ -445,10 +420,8 @@ static void ad714x_wheel_cal_sensor_val(struct ad714x_chip *ad714x, int idx) int i; for (i = hw->start_stage; i <= hw->end_stage; i++) { - ad714x->read(ad714x->dev, CDC_RESULT_S0 + i, - &ad714x->adc_reg[i]); - ad714x->read(ad714x->dev, - STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, + ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]); + ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, &ad714x->amb_reg[i]); if (ad714x->adc_reg[i] > ad714x->amb_reg[i]) ad714x->sensor_val[i] = ad714x->adc_reg[i] - @@ -598,10 +571,8 @@ static void touchpad_cal_sensor_val(struct ad714x_chip *ad714x, int idx) int i; for (i = hw->x_start_stage; i <= hw->x_end_stage; i++) { - ad714x->read(ad714x->dev, CDC_RESULT_S0 + i, - &ad714x->adc_reg[i]); - ad714x->read(ad714x->dev, - STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, + ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]); + ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, &ad714x->amb_reg[i]); if (ad714x->adc_reg[i] > ad714x->amb_reg[i]) ad714x->sensor_val[i] = ad714x->adc_reg[i] - @@ -891,7 +862,7 @@ static int ad714x_hw_detect(struct ad714x_chip *ad714x) { unsigned short data; - ad714x->read(ad714x->dev, AD714X_PARTID_REG, &data); + ad714x->read(ad714x, AD714X_PARTID_REG, &data); switch (data & 0xFFF0) { case AD7142_PARTID: ad714x->product = 0x7142; @@ -940,23 +911,22 @@ static void ad714x_hw_init(struct ad714x_chip *ad714x) for (i = 0; i < STAGE_NUM; i++) { reg_base = AD714X_STAGECFG_REG + i * STAGE_CFGREG_NUM; for (j = 0; j < STAGE_CFGREG_NUM; j++) - ad714x->write(ad714x->dev, reg_base + j, + ad714x->write(ad714x, reg_base + j, ad714x->hw->stage_cfg_reg[i][j]); } for (i = 0; i < SYS_CFGREG_NUM; i++) - ad714x->write(ad714x->dev, AD714X_SYSCFG_REG + i, + ad714x->write(ad714x, AD714X_SYSCFG_REG + i, ad714x->hw->sys_cfg_reg[i]); for (i = 0; i < SYS_CFGREG_NUM; i++) - ad714x->read(ad714x->dev, AD714X_SYSCFG_REG + i, - &data); + ad714x->read(ad714x, AD714X_SYSCFG_REG + i, &data); - ad714x->write(ad714x->dev, AD714X_STG_CAL_EN_REG, 0xFFF); + ad714x->write(ad714x, AD714X_STG_CAL_EN_REG, 0xFFF); /* clear all interrupts */ - ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data); - ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data); - ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data); + ad714x->read(ad714x, STG_LOW_INT_STA_REG, &data); + ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &data); + ad714x->read(ad714x, STG_COM_INT_STA_REG, &data); } static irqreturn_t ad714x_interrupt_thread(int irq, void *data) @@ -966,9 +936,9 @@ static irqreturn_t ad714x_interrupt_thread(int irq, void *data) mutex_lock(&ad714x->mutex); - ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &ad714x->l_state); - ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &ad714x->h_state); - ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &ad714x->c_state); + ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state); + ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &ad714x->h_state); + ad714x->read(ad714x, STG_COM_INT_STA_REG, &ad714x->c_state); for (i = 0; i < ad714x->hw->button_num; i++) ad714x_button_state_machine(ad714x, i); @@ -1245,7 +1215,7 @@ int ad714x_disable(struct ad714x_chip *ad714x) mutex_lock(&ad714x->mutex); data = ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL] | 0x3; - ad714x->write(ad714x->dev, AD714X_PWR_CTRL, data); + ad714x->write(ad714x, AD714X_PWR_CTRL, data); mutex_unlock(&ad714x->mutex); @@ -1263,16 +1233,16 @@ int ad714x_enable(struct ad714x_chip *ad714x) /* resume to non-shutdown mode */ - ad714x->write(ad714x->dev, AD714X_PWR_CTRL, + ad714x->write(ad714x, AD714X_PWR_CTRL, ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL]); /* make sure the interrupt output line is not low level after resume, * otherwise we will get no chance to enter falling-edge irq again */ - ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data); - ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data); - ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data); + ad714x->read(ad714x, STG_LOW_INT_STA_REG, &data); + ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &data); + ad714x->read(ad714x, STG_COM_INT_STA_REG, &data); mutex_unlock(&ad714x->mutex); diff --git a/drivers/input/misc/ad714x.h b/drivers/input/misc/ad714x.h index 45c54fb..d12d149 100644 --- a/drivers/input/misc/ad714x.h +++ b/drivers/input/misc/ad714x.h @@ -11,11 +11,40 @@ #include +#define STAGE_NUM 12 + struct device; +struct ad714x_platform_data; +struct ad714x_driver_data; struct ad714x_chip; -typedef int (*ad714x_read_t)(struct device *, unsigned short, unsigned short *); -typedef int (*ad714x_write_t)(struct device *, unsigned short, unsigned short); +typedef int (*ad714x_read_t)(struct ad714x_chip *, unsigned short, unsigned short *); +typedef int (*ad714x_write_t)(struct ad714x_chip *, unsigned short, unsigned short); + +struct ad714x_chip { + unsigned short h_state; + unsigned short l_state; + unsigned short c_state; + unsigned short adc_reg[STAGE_NUM]; + unsigned short amb_reg[STAGE_NUM]; + unsigned short sensor_val[STAGE_NUM]; + + struct ad714x_platform_data *hw; + struct ad714x_driver_data *sw; + + int irq; + struct device *dev; + ad714x_read_t read; + ad714x_write_t write; + + struct mutex mutex; + + unsigned product; + unsigned version; + + __be16 xfer_buf[16] ____cacheline_aligned; + +}; int ad714x_disable(struct ad714x_chip *ad714x); int ad714x_enable(struct ad714x_chip *ad714x); -- cgit v1.1 From 9eff794b777ac9ca034129a1b637204000c8fb29 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Mon, 22 Aug 2011 09:45:42 -0700 Subject: Input: ad714x - read the interrupt status registers in a row The interrupt status registers should be read in row to avoid invalid data. Alter "read" method for both bus options to allow reading several registers in a row and make sure we read interrupt status registers properly. Read sequence saves 50% of bus transactions compared to single register reads. So use it also for the result registers, which are also located in a row. Also update copyright notice. Signed-off-by: Michael Hennerich Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ad714x-i2c.c | 11 +++++--- drivers/input/misc/ad714x-spi.c | 11 +++++--- drivers/input/misc/ad714x.c | 62 +++++++++++++++++++++-------------------- drivers/input/misc/ad714x.h | 6 ++-- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c index 6c61218..025417d 100644 --- a/drivers/input/misc/ad714x-i2c.c +++ b/drivers/input/misc/ad714x-i2c.c @@ -1,7 +1,7 @@ /* * AD714X CapTouch Programmable Controller driver (I2C bus) * - * Copyright 2009 Analog Devices Inc. + * Copyright 2009-2011 Analog Devices Inc. * * Licensed under the GPL-2 or later. */ @@ -47,9 +47,10 @@ static int ad714x_i2c_write(struct ad714x_chip *chip, } static int ad714x_i2c_read(struct ad714x_chip *chip, - unsigned short reg, unsigned short *data) + unsigned short reg, unsigned short *data, size_t len) { struct i2c_client *client = to_i2c_client(chip->dev); + int i; int error; chip->xfer_buf[0] = cpu_to_be16(reg); @@ -58,14 +59,16 @@ static int ad714x_i2c_read(struct ad714x_chip *chip, sizeof(*chip->xfer_buf)); if (error >= 0) error = i2c_master_recv(client, (u8 *)chip->xfer_buf, - sizeof(*chip->xfer_buf)); + len * sizeof(*chip->xfer_buf)); if (unlikely(error < 0)) { dev_err(&client->dev, "I2C read error: %d\n", error); return error; } - *data = be16_to_cpup(chip->xfer_buf); + for (i = 0; i < len; i++) + data[i] = be16_to_cpu(chip->xfer_buf[i]); + return 0; } diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c index 306577d..875b508 100644 --- a/drivers/input/misc/ad714x-spi.c +++ b/drivers/input/misc/ad714x-spi.c @@ -1,7 +1,7 @@ /* * AD714X CapTouch Programmable Controller driver (SPI bus) * - * Copyright 2009 Analog Devices Inc. + * Copyright 2009-2011 Analog Devices Inc. * * Licensed under the GPL-2 or later. */ @@ -31,11 +31,12 @@ static int ad714x_spi_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume); static int ad714x_spi_read(struct ad714x_chip *chip, - unsigned short reg, unsigned short *data) + unsigned short reg, unsigned short *data, size_t len) { struct spi_device *spi = to_spi_device(chip->dev); struct spi_message message; struct spi_transfer xfer[2]; + int i; int error; spi_message_init(&message); @@ -48,7 +49,7 @@ static int ad714x_spi_read(struct ad714x_chip *chip, spi_message_add_tail(&xfer[0], &message); xfer[1].rx_buf = &chip->xfer_buf[1]; - xfer[1].len = sizeof(chip->xfer_buf[1]); + xfer[1].len = sizeof(chip->xfer_buf[1]) * len; spi_message_add_tail(&xfer[1], &message); error = spi_sync(spi, &message); @@ -57,7 +58,9 @@ static int ad714x_spi_read(struct ad714x_chip *chip, return error; } - *data = be16_to_cpu(chip->xfer_buf[1]); + for (i = 0; i < len; i++) + data[i] = be16_to_cpu(chip->xfer_buf[i + 1]); + return 0; } diff --git a/drivers/input/misc/ad714x.c b/drivers/input/misc/ad714x.c index 2be0366c..ca42c7d 100644 --- a/drivers/input/misc/ad714x.c +++ b/drivers/input/misc/ad714x.c @@ -1,7 +1,7 @@ /* * AD714X CapTouch Programmable Controller driver supporting AD7142/3/7/8/7A * - * Copyright 2009 Analog Devices Inc. + * Copyright 2009-2011 Analog Devices Inc. * * Licensed under the GPL-2 or later. */ @@ -123,6 +123,7 @@ struct ad714x_driver_data { * information to integrate all things which will be private data * of spi/i2c device */ + static void ad714x_use_com_int(struct ad714x_chip *ad714x, int start_stage, int end_stage) { @@ -131,11 +132,11 @@ static void ad714x_use_com_int(struct ad714x_chip *ad714x, mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1); - ad714x->read(ad714x, STG_COM_INT_EN_REG, &data); + ad714x->read(ad714x, STG_COM_INT_EN_REG, &data, 1); data |= 1 << end_stage; ad714x->write(ad714x, STG_COM_INT_EN_REG, data); - ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data); + ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data, 1); data &= ~mask; ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data); } @@ -148,11 +149,11 @@ static void ad714x_use_thr_int(struct ad714x_chip *ad714x, mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1); - ad714x->read(ad714x, STG_COM_INT_EN_REG, &data); + ad714x->read(ad714x, STG_COM_INT_EN_REG, &data, 1); data &= ~(1 << end_stage); ad714x->write(ad714x, STG_COM_INT_EN_REG, data); - ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data); + ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data, 1); data |= mask; ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data); } @@ -250,13 +251,16 @@ static void ad714x_slider_cal_sensor_val(struct ad714x_chip *ad714x, int idx) struct ad714x_slider_plat *hw = &ad714x->hw->slider[idx]; int i; + ad714x->read(ad714x, CDC_RESULT_S0 + hw->start_stage, + &ad714x->adc_reg[hw->start_stage], + hw->end_stage - hw->start_stage + 1); + for (i = hw->start_stage; i <= hw->end_stage; i++) { - ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]); ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, - &ad714x->amb_reg[i]); + &ad714x->amb_reg[i], 1); - ad714x->sensor_val[i] = abs(ad714x->adc_reg[i] - - ad714x->amb_reg[i]); + ad714x->sensor_val[i] = + abs(ad714x->adc_reg[i] - ad714x->amb_reg[i]); } } @@ -419,13 +423,16 @@ static void ad714x_wheel_cal_sensor_val(struct ad714x_chip *ad714x, int idx) struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx]; int i; + ad714x->read(ad714x, CDC_RESULT_S0 + hw->start_stage, + &ad714x->adc_reg[hw->start_stage], + hw->end_stage - hw->start_stage + 1); + for (i = hw->start_stage; i <= hw->end_stage; i++) { - ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]); ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, - &ad714x->amb_reg[i]); + &ad714x->amb_reg[i], 1); if (ad714x->adc_reg[i] > ad714x->amb_reg[i]) - ad714x->sensor_val[i] = ad714x->adc_reg[i] - - ad714x->amb_reg[i]; + ad714x->sensor_val[i] = + ad714x->adc_reg[i] - ad714x->amb_reg[i]; else ad714x->sensor_val[i] = 0; } @@ -570,13 +577,16 @@ static void touchpad_cal_sensor_val(struct ad714x_chip *ad714x, int idx) struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx]; int i; + ad714x->read(ad714x, CDC_RESULT_S0 + hw->x_start_stage, + &ad714x->adc_reg[hw->x_start_stage], + hw->x_end_stage - hw->x_start_stage + 1); + for (i = hw->x_start_stage; i <= hw->x_end_stage; i++) { - ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]); ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, - &ad714x->amb_reg[i]); + &ad714x->amb_reg[i], 1); if (ad714x->adc_reg[i] > ad714x->amb_reg[i]) - ad714x->sensor_val[i] = ad714x->adc_reg[i] - - ad714x->amb_reg[i]; + ad714x->sensor_val[i] = + ad714x->adc_reg[i] - ad714x->amb_reg[i]; else ad714x->sensor_val[i] = 0; } @@ -862,7 +872,7 @@ static int ad714x_hw_detect(struct ad714x_chip *ad714x) { unsigned short data; - ad714x->read(ad714x, AD714X_PARTID_REG, &data); + ad714x->read(ad714x, AD714X_PARTID_REG, &data, 1); switch (data & 0xFFF0) { case AD7142_PARTID: ad714x->product = 0x7142; @@ -919,14 +929,12 @@ static void ad714x_hw_init(struct ad714x_chip *ad714x) ad714x->write(ad714x, AD714X_SYSCFG_REG + i, ad714x->hw->sys_cfg_reg[i]); for (i = 0; i < SYS_CFGREG_NUM; i++) - ad714x->read(ad714x, AD714X_SYSCFG_REG + i, &data); + ad714x->read(ad714x, AD714X_SYSCFG_REG + i, &data, 1); ad714x->write(ad714x, AD714X_STG_CAL_EN_REG, 0xFFF); /* clear all interrupts */ - ad714x->read(ad714x, STG_LOW_INT_STA_REG, &data); - ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &data); - ad714x->read(ad714x, STG_COM_INT_STA_REG, &data); + ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3); } static irqreturn_t ad714x_interrupt_thread(int irq, void *data) @@ -936,9 +944,7 @@ static irqreturn_t ad714x_interrupt_thread(int irq, void *data) mutex_lock(&ad714x->mutex); - ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state); - ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &ad714x->h_state); - ad714x->read(ad714x, STG_COM_INT_STA_REG, &ad714x->c_state); + ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3); for (i = 0; i < ad714x->hw->button_num; i++) ad714x_button_state_machine(ad714x, i); @@ -1225,8 +1231,6 @@ EXPORT_SYMBOL(ad714x_disable); int ad714x_enable(struct ad714x_chip *ad714x) { - unsigned short data; - dev_dbg(ad714x->dev, "%s enter\n", __func__); mutex_lock(&ad714x->mutex); @@ -1240,9 +1244,7 @@ int ad714x_enable(struct ad714x_chip *ad714x) * otherwise we will get no chance to enter falling-edge irq again */ - ad714x->read(ad714x, STG_LOW_INT_STA_REG, &data); - ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &data); - ad714x->read(ad714x, STG_COM_INT_STA_REG, &data); + ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3); mutex_unlock(&ad714x->mutex); diff --git a/drivers/input/misc/ad714x.h b/drivers/input/misc/ad714x.h index d12d149..3c85455 100644 --- a/drivers/input/misc/ad714x.h +++ b/drivers/input/misc/ad714x.h @@ -1,7 +1,7 @@ /* * AD714X CapTouch Programmable Controller driver (bus interfaces) * - * Copyright 2009 Analog Devices Inc. + * Copyright 2009-2011 Analog Devices Inc. * * Licensed under the GPL-2 or later. */ @@ -18,12 +18,12 @@ struct ad714x_platform_data; struct ad714x_driver_data; struct ad714x_chip; -typedef int (*ad714x_read_t)(struct ad714x_chip *, unsigned short, unsigned short *); +typedef int (*ad714x_read_t)(struct ad714x_chip *, unsigned short, unsigned short *, size_t); typedef int (*ad714x_write_t)(struct ad714x_chip *, unsigned short, unsigned short); struct ad714x_chip { - unsigned short h_state; unsigned short l_state; + unsigned short h_state; unsigned short c_state; unsigned short adc_reg[STAGE_NUM]; unsigned short amb_reg[STAGE_NUM]; -- cgit v1.1 From ecb4433550f0620f3d1471ae7099037ede30a91e Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 12 Aug 2011 14:00:59 +0200 Subject: mac80211: fix suspend/resume races with unregister hw Do not call ->suspend, ->resume methods after we unregister wiphy. Also delete sta_clanup timer after we finish wiphy unregister to avoid this: WARNING: at lib/debugobjects.c:262 debug_print_object+0x85/0xa0() Hardware name: 6369CTO ODEBUG: free active (active state 0) object type: timer_list hint: sta_info_cleanup+0x0/0x180 [mac80211] Modules linked in: aes_i586 aes_generic fuse bridge stp llc autofs4 sunrpc cpufreq_ondemand acpi_cpufreq mperf ext2 dm_mod uinput thinkpad_acpi hwmon sg arc4 rt2800usb rt2800lib crc_ccitt rt2x00usb rt2x00lib mac80211 cfg80211 i2c_i801 iTCO_wdt iTCO_vendor_support e1000e ext4 mbcache jbd2 sd_mod crc_t10dif sr_mod cdrom yenta_socket ahci libahci pata_acpi ata_generic ata_piix i915 drm_kms_helper drm i2c_algo_bit video [last unloaded: microcode] Pid: 5663, comm: pm-hibernate Not tainted 3.1.0-rc1-wl+ #19 Call Trace: [] warn_slowpath_common+0x6d/0xa0 [] ? debug_print_object+0x85/0xa0 [] ? debug_print_object+0x85/0xa0 [] warn_slowpath_fmt+0x2e/0x30 [] debug_print_object+0x85/0xa0 [] ? sta_info_alloc+0x1a0/0x1a0 [mac80211] [] debug_check_no_obj_freed+0xe2/0x180 [] kfree+0x8b/0x150 [] cfg80211_dev_free+0x7e/0x90 [cfg80211] [] wiphy_dev_release+0xd/0x10 [cfg80211] [] device_release+0x19/0x80 [] kobject_release+0x7a/0x1c0 [] ? rtnl_unlock+0x8/0x10 [] ? wiphy_resume+0x6b/0x80 [cfg80211] [] ? kobject_del+0x30/0x30 [] kref_put+0x2d/0x60 [] kobject_put+0x1d/0x50 [] ? mutex_lock+0x14/0x40 [] put_device+0xf/0x20 [] dpm_resume+0xca/0x160 [] hibernation_snapshot+0xcd/0x260 [] ? freeze_processes+0x3f/0x90 [] hibernate+0xcb/0x1e0 [] ? pm_async_store+0x40/0x40 [] state_store+0xa0/0xb0 [] ? pm_async_store+0x40/0x40 [] kobj_attr_store+0x20/0x30 [] sysfs_write_file+0x94/0xf0 [] vfs_write+0x9a/0x160 [] ? sysfs_open_file+0x200/0x200 [] sys_write+0x3d/0x70 [] sysenter_do_call+0x12/0x28 Cc: stable@kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- include/net/cfg80211.h | 3 +++ net/mac80211/main.c | 2 +- net/wireless/core.c | 7 +++++++ net/wireless/sysfs.c | 6 ++++-- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d17f47f..408ae48 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1865,6 +1865,9 @@ struct wiphy { * you need use set_wiphy_dev() (see below) */ struct device dev; + /* protects ->resume, ->suspend sysfs callbacks against unregister hw */ + bool registered; + /* dir in debugfs: ieee80211/ */ struct dentry *debugfsdir; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 866f269..acb4423 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1012,7 +1012,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) cancel_work_sync(&local->reconfig_filter); ieee80211_clear_tx_pending(local); - sta_info_stop(local); rate_control_deinitialize(local); if (skb_queue_len(&local->skb_queue) || @@ -1024,6 +1023,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) destroy_workqueue(local->workqueue); wiphy_unregister(local->hw.wiphy); + sta_info_stop(local); ieee80211_wep_free(local); ieee80211_led_exit(local); kfree(local->int_scan_req); diff --git a/net/wireless/core.c b/net/wireless/core.c index 645437c..c148651 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -616,6 +616,9 @@ int wiphy_register(struct wiphy *wiphy) if (res) goto out_rm_dev; + rtnl_lock(); + rdev->wiphy.registered = true; + rtnl_unlock(); return 0; out_rm_dev: @@ -647,6 +650,10 @@ void wiphy_unregister(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + rtnl_lock(); + rdev->wiphy.registered = false; + rtnl_unlock(); + rfkill_unregister(rdev->rfkill); /* protect the device list */ diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index c6e4ca6..ff57459 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -93,7 +93,8 @@ static int wiphy_suspend(struct device *dev, pm_message_t state) if (rdev->ops->suspend) { rtnl_lock(); - ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan); + if (rdev->wiphy.registered) + ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan); rtnl_unlock(); } @@ -112,7 +113,8 @@ static int wiphy_resume(struct device *dev) if (rdev->ops->resume) { rtnl_lock(); - ret = rdev->ops->resume(&rdev->wiphy); + if (rdev->wiphy.registered) + ret = rdev->ops->resume(&rdev->wiphy); rtnl_unlock(); } -- cgit v1.1 From 543cc38c8fe86deba4169977c61eb88491036837 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 12 Aug 2011 14:02:04 +0200 Subject: rt2x00: do not drop usb dev reference counter on suspend When hibernating ->resume may not be called by usb core, but disconnect and probe instead, so we do not increase the counter after decreasing it in ->supend. As a result we free memory early, and get crash when unplugging usb dongle. BUG: unable to handle kernel paging request at 6b6b6b9f IP: [] driver_sysfs_remove+0x10/0x30 *pdpt = 0000000034f21001 *pde = 0000000000000000 Pid: 20, comm: khubd Not tainted 3.1.0-rc1-wl+ #20 LENOVO 6369CTO/6369CTO EIP: 0060:[] EFLAGS: 00010202 CPU: 1 EIP is at driver_sysfs_remove+0x10/0x30 EAX: 6b6b6b6b EBX: f52bba34 ECX: 00000000 EDX: 6b6b6b6b ESI: 6b6b6b6b EDI: c0a0ea20 EBP: f61c9e68 ESP: f61c9e64 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process khubd (pid: 20, ti=f61c8000 task=f6138270 task.ti=f61c8000) Call Trace: [] __device_release_driver+0x1f/0xa0 [] device_release_driver+0x20/0x40 [] bus_remove_device+0x84/0xe0 [] ? device_remove_attrs+0x2a/0x80 [] device_del+0xe7/0x170 [] usb_disconnect+0xd4/0x180 [] hub_thread+0x691/0x1600 [] ? wake_up_bit+0x30/0x30 [] ? complete+0x49/0x60 [] ? hub_disconnect+0xd0/0xd0 [] ? hub_disconnect+0xd0/0xd0 [] kthread+0x74/0x80 [] ? kthread_worker_fn+0x150/0x150 [] kernel_thread_helper+0x6/0x10 Cc: stable@kernel.org Signed-off-by: Stanislaw Gruszka Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00usb.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c index 7fbb55c..1e31050 100644 --- a/drivers/net/wireless/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/rt2x00/rt2x00usb.c @@ -871,18 +871,8 @@ int rt2x00usb_suspend(struct usb_interface *usb_intf, pm_message_t state) { struct ieee80211_hw *hw = usb_get_intfdata(usb_intf); struct rt2x00_dev *rt2x00dev = hw->priv; - int retval; - - retval = rt2x00lib_suspend(rt2x00dev, state); - if (retval) - return retval; - /* - * Decrease usbdev refcount. - */ - usb_put_dev(interface_to_usbdev(usb_intf)); - - return 0; + return rt2x00lib_suspend(rt2x00dev, state); } EXPORT_SYMBOL_GPL(rt2x00usb_suspend); @@ -891,8 +881,6 @@ int rt2x00usb_resume(struct usb_interface *usb_intf) struct ieee80211_hw *hw = usb_get_intfdata(usb_intf); struct rt2x00_dev *rt2x00dev = hw->priv; - usb_get_dev(interface_to_usbdev(usb_intf)); - return rt2x00lib_resume(rt2x00dev); } EXPORT_SYMBOL_GPL(rt2x00usb_resume); -- cgit v1.1 From b503c7a273c0a3018ad11ea8c513c639120afbf4 Mon Sep 17 00:00:00 2001 From: Senthil Balasubramanian Date: Fri, 19 Aug 2011 18:43:06 +0530 Subject: ath9k_hw: Fix STA (AR9485) bringup issue due to incorrect MAC address Due to some recent optimization done in the way the mac address bytes are written into the OTP memory, some AR9485 chipsets were forced to use the first byte from the eeprom template and the remaining bytes are read from OTP. AR9485 happens to use generic eeprom template which has 0x1 as the first byte causes issues in bringing up the card. So fixed the eeprom template accordingly to address the issue. Cc: stable@kernel.org Cc: Paul Stewart Signed-off-by: Senthil Balasubramanian Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index c34bef1..1b94003 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -69,7 +69,7 @@ static int ar9003_hw_power_interpolate(int32_t x, static const struct ar9300_eeprom ar9300_default = { .eepromVersion = 2, .templateVersion = 2, - .macAddr = {1, 2, 3, 4, 5, 6}, + .macAddr = {0, 2, 3, 4, 5, 6}, .custData = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, .baseEepHeader = { -- cgit v1.1 From 886b66ef2f2d4984f6c72d86a9d8a3ffe4344fa5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 19 Aug 2011 22:14:47 +0200 Subject: bcma: add uevent to the bus, to autoload drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: David Woodhouse Acked-by: Rafał Miłecki Signed-off-by: John W. Linville --- drivers/bcma/main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 873e2e4..73b7b1a 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -15,6 +15,7 @@ MODULE_LICENSE("GPL"); static int bcma_bus_match(struct device *dev, struct device_driver *drv); static int bcma_device_probe(struct device *dev); static int bcma_device_remove(struct device *dev); +static int bcma_device_uevent(struct device *dev, struct kobj_uevent_env *env); static ssize_t manuf_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -49,6 +50,7 @@ static struct bus_type bcma_bus_type = { .match = bcma_bus_match, .probe = bcma_device_probe, .remove = bcma_device_remove, + .uevent = bcma_device_uevent, .dev_attrs = bcma_device_attrs, }; @@ -227,6 +229,16 @@ static int bcma_device_remove(struct device *dev) return 0; } +static int bcma_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct bcma_device *core = container_of(dev, struct bcma_device, dev); + + return add_uevent_var(env, + "MODALIAS=bcma:m%04Xid%04Xrev%02Xcl%02X", + core->id.manuf, core->id.id, + core->id.rev, core->id.class); +} + static int __init bcma_modinit(void) { int err; -- cgit v1.1 From d5c073caf050bc713271a02e016b1672d9b7b935 Mon Sep 17 00:00:00 2001 From: Geoffrey Thomas Date: Mon, 22 Aug 2011 11:28:57 -0700 Subject: net: Documentation: RFC 2553bis is now RFC 3493 Signed-off-by: Geoffrey Thomas Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index db2a406..8154699 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -992,7 +992,7 @@ bindv6only - BOOLEAN TRUE: disable IPv4-mapped address feature FALSE: enable IPv4-mapped address feature - Default: FALSE (as specified in RFC2553bis) + Default: FALSE (as specified in RFC3493) IPv6 Fragmentation: -- cgit v1.1 From fcb8ce5cfe30ca9ca5c9a79cdfe26d1993e65e0c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 22 Aug 2011 11:42:53 -0700 Subject: Linux 3.1-rc3 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3241d41..788511f 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ VERSION = 3 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc2 -NAME = Wet Seal +EXTRAVERSION = -rc3 +NAME = "Divemaster Edition" # *DOCUMENTATION* # To see a list of typical targets execute "make help" -- cgit v1.1 From 052605c6caa3e1edf8eee8fe5fe6d53f5721f39a Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 26 Jul 2011 17:48:43 -0700 Subject: target: Make standard INQUIRY return 'not connected' for tpg_virt_lun0 This patch changes target_emulate_inquiry_std() to set the 'not connected' (0x35) bit in standard INQUIRY response data when we are processing a request to a virtual LUN=0 mapping from struct se_device *g_lun0_dev that have been setup for us in transport_lookup_cmd_lun(). This addresses an issue where qla2xxx FC clients need to be able to create demo-mode I_T FC Nexuses by default, but should not be exposing the default set of TPG LUNs to all FC clients. This includes adding an new optional target_core_fabric_ops->tpg_check_demo_mode_login_only() caller to allow demo_mode nexuses to skip the old default of bulding a demo-mode MappedLUNs list via core_tpg_add_node_to_devs(). (roland: Add missing tpg_check_demo_mode_login_only check in core_dev_add_lun) Reported-by: Roland Dreier Cc: Andrew Vasquez Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_cdb.c | 11 ++++++++--- drivers/target/target_core_device.c | 4 +++- drivers/target/target_core_tpg.c | 12 ++++++++++-- include/target/target_core_fabric_ops.h | 6 ++++++ 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c index 8ae09a1..d095408 100644 --- a/drivers/target/target_core_cdb.c +++ b/drivers/target/target_core_cdb.c @@ -67,6 +67,7 @@ target_emulate_inquiry_std(struct se_cmd *cmd) { struct se_lun *lun = cmd->se_lun; struct se_device *dev = cmd->se_dev; + struct se_portal_group *tpg = lun->lun_sep->sep_tpg; unsigned char *buf; /* @@ -81,9 +82,13 @@ target_emulate_inquiry_std(struct se_cmd *cmd) buf = transport_kmap_first_data_page(cmd); - buf[0] = dev->transport->get_device_type(dev); - if (buf[0] == TYPE_TAPE) - buf[1] = 0x80; + if (dev == tpg->tpg_virt_lun0.lun_se_dev) { + buf[0] = 0x3f; /* Not connected */ + } else { + buf[0] = dev->transport->get_device_type(dev); + if (buf[0] == TYPE_TAPE) + buf[1] = 0x80; + } buf[2] = dev->transport->get_device_rev(dev); /* diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index b38b6c9..ec3fbcd 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1346,7 +1346,9 @@ struct se_lun *core_dev_add_lun( struct se_node_acl *acl; spin_lock_bh(&tpg->acl_node_lock); list_for_each_entry(acl, &tpg->acl_node_list, acl_list) { - if (acl->dynamic_node_acl) { + if (acl->dynamic_node_acl && + (!tpg->se_tpg_tfo->tpg_check_demo_mode_login_only || + !tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg))) { spin_unlock_bh(&tpg->acl_node_lock); core_tpg_add_node_to_devs(acl, tpg); spin_lock_bh(&tpg->acl_node_lock); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 4f1ba4c..718ccd1 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -298,8 +298,16 @@ struct se_node_acl *core_tpg_check_initiator_node_acl( tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); return NULL; } - - core_tpg_add_node_to_devs(acl, tpg); + /* + * Here we only create demo-mode MappedLUNs from the active + * TPG LUNs if the fabric is not explictly asking for + * tpg_check_demo_mode_login_only() == 1. + */ + if ((tpg->se_tpg_tfo->tpg_check_demo_mode_login_only != NULL) && + (tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg) == 1)) + do { ; } while (0); + else + core_tpg_add_node_to_devs(acl, tpg); spin_lock_bh(&tpg->acl_node_lock); list_add_tail(&acl->acl_list, &tpg->acl_node_list); diff --git a/include/target/target_core_fabric_ops.h b/include/target/target_core_fabric_ops.h index 2de8fe9..126c675 100644 --- a/include/target/target_core_fabric_ops.h +++ b/include/target/target_core_fabric_ops.h @@ -27,6 +27,12 @@ struct target_core_fabric_ops { int (*tpg_check_demo_mode_cache)(struct se_portal_group *); int (*tpg_check_demo_mode_write_protect)(struct se_portal_group *); int (*tpg_check_prod_mode_write_protect)(struct se_portal_group *); + /* + * Optionally used by fabrics to allow demo-mode login, but not + * expose any TPG LUNs, and return 'not connected' in standard + * inquiry response + */ + int (*tpg_check_demo_mode_login_only)(struct se_portal_group *); struct se_node_acl *(*tpg_alloc_fabric_acl)( struct se_portal_group *); void (*tpg_release_fabric_acl)(struct se_portal_group *, -- cgit v1.1 From e1750ba20f0d850c38820190ccbf0f647723091a Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Mon, 1 Aug 2011 23:58:18 +0200 Subject: target: Use ERR_CAST inlined function Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...)) The semantic patch that makes this output is available in scripts/coccinelle/api/err_cast.cocci. More information about semantic patching is available at http://coccinelle.lip6.fr/ Signed-off-by: Thomas Meyer Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_configfs.c | 4 ++-- drivers/target/target_core_fabric_configfs.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index f095e65..f1643db 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -268,7 +268,7 @@ struct se_tpg_np *lio_target_call_addnptotpg( ISCSI_TCP); if (IS_ERR(tpg_np)) { iscsit_put_tpg(tpg); - return ERR_PTR(PTR_ERR(tpg_np)); + return ERR_CAST(tpg_np); } pr_debug("LIO_Target_ConfigFS: addnptotpg done!\n"); @@ -1285,7 +1285,7 @@ struct se_wwn *lio_target_call_coreaddtiqn( tiqn = iscsit_add_tiqn((unsigned char *)name); if (IS_ERR(tiqn)) - return ERR_PTR(PTR_ERR(tiqn)); + return ERR_CAST(tiqn); /* * Setup struct iscsi_wwn_stat_grps for se_wwn->fabric_stat_group. */ diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index f165469..55bbe08 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -481,7 +481,7 @@ static struct config_group *target_fabric_make_nodeacl( se_nacl = tf->tf_ops.fabric_make_nodeacl(se_tpg, group, name); if (IS_ERR(se_nacl)) - return ERR_PTR(PTR_ERR(se_nacl)); + return ERR_CAST(se_nacl); nacl_cg = &se_nacl->acl_group; nacl_cg->default_groups = se_nacl->acl_default_groups; -- cgit v1.1 From 9be08c5804ae4ad96ec22d0b1e71e630803a85ea Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 2 Aug 2011 10:26:36 +0200 Subject: iscsi-target: Fix leak on failure in iscsi_copy_param_list() We leak memory if the allocations for 'new_param->name' or 'new_param->value' fail in iscsi_target_parameters.c::iscsi_copy_param_list() We also do a lot of variable assignments that are completely pointless if the allocations fail. So, let's move the allocations before the assignments and also make sure that we free whatever was allocated to one if the allocation fail. There's also some small CodingStyle fixups in there (curly braces on both branches of if statement, only one variable per line) since I was in the area anyway. And finally, error messages in the function are put on a single line for easy grep'abillity. Signed-off-by: Jesper Juhl Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_parameters.c | 43 ++++++++++---------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 252e246..497b2e7 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -545,13 +545,13 @@ int iscsi_copy_param_list( struct iscsi_param_list *src_param_list, int leading) { - struct iscsi_param *new_param = NULL, *param = NULL; + struct iscsi_param *param = NULL; + struct iscsi_param *new_param = NULL; struct iscsi_param_list *param_list = NULL; param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL); if (!param_list) { - pr_err("Unable to allocate memory for" - " struct iscsi_param_list.\n"); + pr_err("Unable to allocate memory for struct iscsi_param_list.\n"); goto err_out; } INIT_LIST_HEAD(¶m_list->param_list); @@ -567,8 +567,17 @@ int iscsi_copy_param_list( new_param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL); if (!new_param) { - pr_err("Unable to allocate memory for" - " struct iscsi_param.\n"); + pr_err("Unable to allocate memory for struct iscsi_param.\n"); + goto err_out; + } + + new_param->name = kstrdup(param->name, GFP_KERNEL); + new_param->value = kstrdup(param->value, GFP_KERNEL); + if (!new_param->value || !new_param->name) { + kfree(new_param->value); + kfree(new_param->name); + kfree(new_param); + pr_err("Unable to allocate memory for parameter name/value.\n"); goto err_out; } @@ -580,32 +589,12 @@ int iscsi_copy_param_list( new_param->use = param->use; new_param->type_range = param->type_range; - new_param->name = kzalloc(strlen(param->name) + 1, GFP_KERNEL); - if (!new_param->name) { - pr_err("Unable to allocate memory for" - " parameter name.\n"); - goto err_out; - } - - new_param->value = kzalloc(strlen(param->value) + 1, - GFP_KERNEL); - if (!new_param->value) { - pr_err("Unable to allocate memory for" - " parameter value.\n"); - goto err_out; - } - - memcpy(new_param->name, param->name, strlen(param->name)); - new_param->name[strlen(param->name)] = '\0'; - memcpy(new_param->value, param->value, strlen(param->value)); - new_param->value[strlen(param->value)] = '\0'; - list_add_tail(&new_param->p_list, ¶m_list->param_list); } - if (!list_empty(¶m_list->param_list)) + if (!list_empty(¶m_list->param_list)) { *dst_param_list = param_list; - else { + } else { pr_err("No parameters allocated.\n"); goto err_out; } -- cgit v1.1 From 6fc6148865c9a17cee33f251723f6a056f022ecd Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 2 Aug 2011 12:35:02 +0200 Subject: target: Convert target_core_rd.c to use use BUG_ON Use BUG_ON(x) rather than if(x) BUG(); The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; @@ -if (x) BUG(); +BUG_ON(x); @@ identifier x; @@ -if (!x) BUG(); +BUG_ON(!x); // Signed-off-by: Julia Lawall Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_rd.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 3dd81d2..e567e12 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -390,12 +390,10 @@ static int rd_MEMCPY_read(struct rd_request *req) length = req->rd_size; dst = sg_virt(&sg_d[i++]) + dst_offset; - if (!dst) - BUG(); + BUG_ON(!dst); src = sg_virt(&sg_s[j]) + src_offset; - if (!src) - BUG(); + BUG_ON(!src); dst_offset = 0; src_offset = length; @@ -415,8 +413,7 @@ static int rd_MEMCPY_read(struct rd_request *req) length = req->rd_size; dst = sg_virt(&sg_d[i]) + dst_offset; - if (!dst) - BUG(); + BUG_ON(!dst); if (sg_d[i].length == length) { i++; @@ -425,8 +422,7 @@ static int rd_MEMCPY_read(struct rd_request *req) dst_offset = length; src = sg_virt(&sg_s[j++]) + src_offset; - if (!src) - BUG(); + BUG_ON(!src); src_offset = 0; page_end = 1; @@ -510,12 +506,10 @@ static int rd_MEMCPY_write(struct rd_request *req) length = req->rd_size; src = sg_virt(&sg_s[i++]) + src_offset; - if (!src) - BUG(); + BUG_ON(!src); dst = sg_virt(&sg_d[j]) + dst_offset; - if (!dst) - BUG(); + BUG_ON(!dst); src_offset = 0; dst_offset = length; @@ -535,8 +529,7 @@ static int rd_MEMCPY_write(struct rd_request *req) length = req->rd_size; src = sg_virt(&sg_s[i]) + src_offset; - if (!src) - BUG(); + BUG_ON(!src); if (sg_s[i].length == length) { i++; @@ -545,8 +538,7 @@ static int rd_MEMCPY_write(struct rd_request *req) src_offset = length; dst = sg_virt(&sg_d[j++]) + dst_offset; - if (!dst) - BUG(); + BUG_ON(!dst); dst_offset = 0; page_end = 1; -- cgit v1.1 From c2337c709102b343bd917ae00c79b266fb15b871 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Aug 2011 14:02:27 -0700 Subject: iscsi-target: remove duplicate return We returned on the line before already. Signed-off-by: Dan Carpenter Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index c24fb10..6a4ea29 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -2243,7 +2243,6 @@ static int iscsit_handle_snack( case 0: return iscsit_handle_recovery_datain_or_r2t(conn, buf, hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength); - return 0; case ISCSI_FLAG_SNACK_TYPE_STATUS: return iscsit_handle_status_snack(conn, hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength); -- cgit v1.1 From 387e96c05299ca7a0ade874f343f91f0b01086a0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Aug 2011 14:06:44 -0700 Subject: iscsi-target: forever loop bug in iscsit_attach_ooo_cmdsn() This patch fixes a forever loop bug in iscsit_attach_ooo_cmdsn() while walking sess->sess_ooo_cmdsn_list when the received CmdSN is less than the tail of the list. Signed-off-by: Dan Carpenter Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_erl1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index 9806507..c4c68da 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -834,7 +834,7 @@ static int iscsit_attach_ooo_cmdsn( */ list_for_each_entry(ooo_tmp, &sess->sess_ooo_cmdsn_list, ooo_list) { - while (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn) + if (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn) continue; list_add(&ooo_cmdsn->ooo_list, -- cgit v1.1 From 16ab8e60a0ebc22cfbe61d84e620457a15f3a0bc Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 8 Aug 2011 19:03:38 -0700 Subject: target: Fix write payload exception handling with ->new_cmd_map This patch fixes a bug for fabrics using tfo->new_cmd_map() that are expect transport_generic_request_failure() to be calling transport_send_check_condition_and_sense() for both READ and WRITE, instead of only for READ exceptions. This was originally observed with a failed WRITE_SAME_16 w/ unmap=0 using tcm_loop. Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index cc5a339..fd7d451 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2053,8 +2053,14 @@ static void transport_generic_request_failure( cmd->scsi_sense_reason = TCM_UNSUPPORTED_SCSI_OPCODE; break; } - - if (!sc) + /* + * If a fabric does not define a cmd->se_tfo->new_cmd_map caller, + * make the call to transport_send_check_condition_and_sense() + * directly. Otherwise expect the fabric to make the call to + * transport_send_check_condition_and_sense() after handling + * possible unsoliticied write data payloads. + */ + if (!sc && !cmd->se_tfo->new_cmd_map) transport_new_cmd_failure(cmd); else { ret = transport_send_check_condition_and_sense(cmd, -- cgit v1.1 From 706d5860969b3b24d65d9a57bd3bb5e4a1419c08 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 28 Jul 2011 00:07:03 -0700 Subject: target: Add WRITE_SAME (10) parsing and refactor passthrough checks This patch adds initial WRITE_SAME (10) w/ UNMAP=1 support following updates in sbcr26 to allow UNMAP=1 for the non 16 + 32 byte CDB case. It also refactors current pSCSI passthrough passthrough checks into target_check_write_same_discard() ahead of UNMAP=0 w/ write payload support into target_core_iblock.c. This includes the support for handling WRITE_SAME in transport_emulate_control_cdb(), and converts target_emulate_write_same to accept num_blocks directly for WRITE_SAME, WRITE_SAME_16 and WRITE_SAME_32. Reported-by: Eric Seppanen Cc: Roland Dreier Cc: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_cdb.c | 28 +++++---- drivers/target/target_core_transport.c | 102 ++++++++++++++++++--------------- 2 files changed, 73 insertions(+), 57 deletions(-) diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c index d095408..4c1d3a9 100644 --- a/drivers/target/target_core_cdb.c +++ b/drivers/target/target_core_cdb.c @@ -1090,24 +1090,17 @@ err: * Note this is not used for TCM/pSCSI passthrough */ static int -target_emulate_write_same(struct se_task *task, int write_same32) +target_emulate_write_same(struct se_task *task, u32 num_blocks) { struct se_cmd *cmd = task->task_se_cmd; struct se_device *dev = cmd->se_dev; sector_t range; sector_t lba = cmd->t_task_lba; - unsigned int num_blocks; int ret; /* - * Extract num_blocks from the WRITE_SAME_* CDB. Then use the explict - * range when non zero is supplied, otherwise calculate the remaining - * range based on ->get_blocks() - starting LBA. + * Use the explicit range when non zero is supplied, otherwise calculate + * the remaining range based on ->get_blocks() - starting LBA. */ - if (write_same32) - num_blocks = get_unaligned_be32(&cmd->t_task_cdb[28]); - else - num_blocks = get_unaligned_be32(&cmd->t_task_cdb[10]); - if (num_blocks != 0) range = num_blocks; else @@ -1170,13 +1163,23 @@ transport_emulate_control_cdb(struct se_task *task) } ret = target_emulate_unmap(task); break; + case WRITE_SAME: + if (!dev->transport->do_discard) { + pr_err("WRITE_SAME emulation not supported" + " for: %s\n", dev->transport->name); + return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE; + } + ret = target_emulate_write_same(task, + get_unaligned_be16(&cmd->t_task_cdb[7])); + break; case WRITE_SAME_16: if (!dev->transport->do_discard) { pr_err("WRITE_SAME_16 emulation not supported" " for: %s\n", dev->transport->name); return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE; } - ret = target_emulate_write_same(task, 0); + ret = target_emulate_write_same(task, + get_unaligned_be32(&cmd->t_task_cdb[10])); break; case VARIABLE_LENGTH_CMD: service_action = @@ -1189,7 +1192,8 @@ transport_emulate_control_cdb(struct se_task *task) dev->transport->name); return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE; } - ret = target_emulate_write_same(task, 1); + ret = target_emulate_write_same(task, + get_unaligned_be32(&cmd->t_task_cdb[28])); break; default: pr_err("Unsupported VARIABLE_LENGTH_CMD SA:" diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index fd7d451..eb8055a 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2861,6 +2861,38 @@ static int transport_cmd_get_valid_sectors(struct se_cmd *cmd) return sectors; } +static int target_check_write_same_discard(unsigned char *flags, struct se_device *dev) +{ + /* + * Determine if the received WRITE_SAME is used to for direct + * passthrough into Linux/SCSI with struct request via TCM/pSCSI + * or we are signaling the use of internal WRITE_SAME + UNMAP=1 + * emulation for -> Linux/BLOCK disbard with TCM/IBLOCK code. + */ + int passthrough = (dev->transport->transport_type == + TRANSPORT_PLUGIN_PHBA_PDEV); + + if (!passthrough) { + if ((flags[0] & 0x04) || (flags[0] & 0x02)) { + pr_err("WRITE_SAME PBDATA and LBDATA" + " bits not supported for Block Discard" + " Emulation\n"); + return -ENOSYS; + } + /* + * Currently for the emulated case we only accept + * tpws with the UNMAP=1 bit set. + */ + if (!(flags[0] & 0x08)) { + pr_err("WRITE_SAME w/o UNMAP bit not" + " supported for Block Discard Emulation\n"); + return -ENOSYS; + } + } + + return 0; +} + /* transport_generic_cmd_sequencer(): * * Generic Command Sequencer that should work for most DAS transport @@ -3081,27 +3113,9 @@ static int transport_generic_cmd_sequencer( cmd->t_task_lba = get_unaligned_be64(&cdb[12]); cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB; - /* - * Skip the remaining assignments for TCM/PSCSI passthrough - */ - if (passthrough) - break; - - if ((cdb[10] & 0x04) || (cdb[10] & 0x02)) { - pr_err("WRITE_SAME PBDATA and LBDATA" - " bits not supported for Block Discard" - " Emulation\n"); + if (target_check_write_same_discard(&cdb[10], dev) < 0) goto out_invalid_cdb_field; - } - /* - * Currently for the emulated case we only accept - * tpws with the UNMAP=1 bit set. - */ - if (!(cdb[10] & 0x08)) { - pr_err("WRITE_SAME w/o UNMAP bit not" - " supported for Block Discard Emulation\n"); - goto out_invalid_cdb_field; - } + break; default: pr_err("VARIABLE_LENGTH_CMD service action" @@ -3358,33 +3372,31 @@ static int transport_generic_cmd_sequencer( } cmd->t_task_lba = get_unaligned_be64(&cdb[2]); - passthrough = (dev->transport->transport_type == - TRANSPORT_PLUGIN_PHBA_PDEV); - /* - * Determine if the received WRITE_SAME_16 is used to for direct - * passthrough into Linux/SCSI with struct request via TCM/pSCSI - * or we are signaling the use of internal WRITE_SAME + UNMAP=1 - * emulation for -> Linux/BLOCK disbard with TCM/IBLOCK and - * TCM/FILEIO subsystem plugin backstores. - */ - if (!passthrough) { - if ((cdb[1] & 0x04) || (cdb[1] & 0x02)) { - pr_err("WRITE_SAME PBDATA and LBDATA" - " bits not supported for Block Discard" - " Emulation\n"); - goto out_invalid_cdb_field; - } - /* - * Currently for the emulated case we only accept - * tpws with the UNMAP=1 bit set. - */ - if (!(cdb[1] & 0x08)) { - pr_err("WRITE_SAME w/o UNMAP bit not " - " supported for Block Discard Emulation\n"); - goto out_invalid_cdb_field; - } + cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB; + + if (target_check_write_same_discard(&cdb[1], dev) < 0) + goto out_invalid_cdb_field; + break; + case WRITE_SAME: + sectors = transport_get_sectors_10(cdb, cmd, §or_ret); + if (sector_ret) + goto out_unsupported_cdb; + + if (sectors) + size = transport_get_size(sectors, cdb, cmd); + else { + pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n"); + goto out_invalid_cdb_field; } + + cmd->t_task_lba = get_unaligned_be32(&cdb[2]); cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB; + /* + * Follow sbcr26 with WRITE_SAME (10) and check for the existence + * of byte 1 bit 3 UNMAP instead of original reserved field + */ + if (target_check_write_same_discard(&cdb[1], dev) < 0) + goto out_invalid_cdb_field; break; case ALLOW_MEDIUM_REMOVAL: case GPCMD_CLOSE_TRACK: -- cgit v1.1 From 12850626e2717f866a94e6ced724e3efe5a0aab8 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 8 Aug 2011 19:08:23 -0700 Subject: target: Fix WRITE_SAME usage with transport_get_size For all flavours of WRITE_SAME, we only expect to handle a single block of data-out buffer payload, regardless of the number of logical blocks presented in the CDB. This patch changes all flavours of WRITE_SAME in transport_generic_cmd_sequencer() to pass '1' into transport_get_size() instead of the extracted 'sectors' to properly handle the default usage of sg_write_same without the --xferlen parameter. Reported-by: Eric Seppanen Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index eb8055a..d35c2cc 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3103,7 +3103,7 @@ static int transport_generic_cmd_sequencer( goto out_unsupported_cdb; if (sectors) - size = transport_get_size(sectors, cdb, cmd); + size = transport_get_size(1, cdb, cmd); else { pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not" " supported\n"); @@ -3365,7 +3365,7 @@ static int transport_generic_cmd_sequencer( goto out_unsupported_cdb; if (sectors) - size = transport_get_size(sectors, cdb, cmd); + size = transport_get_size(1, cdb, cmd); else { pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n"); goto out_invalid_cdb_field; @@ -3383,7 +3383,7 @@ static int transport_generic_cmd_sequencer( goto out_unsupported_cdb; if (sectors) - size = transport_get_size(sectors, cdb, cmd); + size = transport_get_size(1, cdb, cmd); else { pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n"); goto out_invalid_cdb_field; -- cgit v1.1 From 72f4ba1e32a1e5da31dcf14ea4b8985ae88a8bdb Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 9 Aug 2011 22:53:02 -0700 Subject: target: Remove duplicate task completions in transport_emulate_control_cdb This patch removes a duplicate set of transport_complete_task() calls in target_emulate_unmap() and target_emulate_write_same() as the completion call is already done within transport_emulate_control_cdb() This patch also adds a check in transport_emulate_control_cdb() for the existing SCF_EMULATE_CDB_ASYNC flag currently used by SYNCHRONIZE_CACHE in order to handle IMMEDIATE processing. Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_cdb.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c index 4c1d3a9..40ad142 100644 --- a/drivers/target/target_core_cdb.c +++ b/drivers/target/target_core_cdb.c @@ -1077,8 +1077,6 @@ target_emulate_unmap(struct se_task *task) size -= 16; } - task->task_scsi_status = GOOD; - transport_complete_task(task, 1); err: transport_kunmap_first_data_page(cmd); @@ -1115,8 +1113,6 @@ target_emulate_write_same(struct se_task *task, u32 num_blocks) return ret; } - task->task_scsi_status = GOOD; - transport_complete_task(task, 1); return 0; } @@ -1228,8 +1224,14 @@ transport_emulate_control_cdb(struct se_task *task) if (ret < 0) return ret; - task->task_scsi_status = GOOD; - transport_complete_task(task, 1); + /* + * Handle the successful completion here unless a caller + * has explictly requested an asychronous completion. + */ + if (!(cmd->se_cmd_flags & SCF_EMULATE_CDB_ASYNC)) { + task->task_scsi_status = GOOD; + transport_complete_task(task, 1); + } return PYX_TRANSPORT_SENT_TO_TRANSPORT; } -- cgit v1.1 From 7abbe7f3e4243e28a9169ee1b8d76f10a6f5d37c Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 10 Aug 2011 18:41:14 -0700 Subject: target: Fix SYNCHRONIZE_CACHE zero LBA + range breakage This patch fixes a SYNCHRONIZE_CACHE CDB handling bug with IBLOCK/FILEIO backends where transport_cmd_get_valid_sectors() was incorrectly rejecting a zero LBA + range CDB from being processed, and returning CHECK_CONDITION. This includes changing transport_cmd_get_valid_sectors() to return '0' on success and '-EINVAL' on failure (this makes more sense than sectors), and to only check transport_cmd_get_valid_sectors() when a non zero LBA + range SYNCHRONIZE_CACHE operation has been receieved for the non passthrough case. Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index d35c2cc..d385c317 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2853,12 +2853,10 @@ static int transport_cmd_get_valid_sectors(struct se_cmd *cmd) " transport_dev_end_lba(): %llu\n", cmd->t_task_lba, sectors, transport_dev_end_lba(dev)); - pr_err(" We should return CHECK_CONDITION" - " but we don't yet\n"); - return 0; + return -EINVAL; } - return sectors; + return 0; } static int target_check_write_same_discard(unsigned char *flags, struct se_device *dev) @@ -3350,10 +3348,12 @@ static int transport_generic_cmd_sequencer( cmd->se_cmd_flags |= SCF_EMULATE_CDB_ASYNC; /* * Check to ensure that LBA + Range does not exceed past end of - * device. + * device for IBLOCK and FILEIO ->do_sync_cache() backend calls */ - if (!transport_cmd_get_valid_sectors(cmd)) - goto out_invalid_cdb_field; + if ((cmd->t_task_lba != 0) || (sectors != 0)) { + if (transport_cmd_get_valid_sectors(cmd) < 0) + goto out_invalid_cdb_field; + } break; case UNMAP: size = get_unaligned_be16(&cdb[7]); -- cgit v1.1 From 01cde4d54327884a0b61ce8666092f5703557d4b Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 10 Aug 2011 00:59:58 -0700 Subject: target: Add missing DATA_SG_IO transport_cmd_get_valid_sectors check This patch adds the missing transport_cmd_get_valid_sectors() check for SCF_SCSI_DATA_SG_IO_CDB type payloads to ensure that a received LBA + range does not exeed past the end of associated backend struct se_device. This patch also fixes a bug in the failure path of transport_new_cmd_obj() where this check can fail, so change to use a signed 'rc' and return '-EINVAL' to signal proper transport_generic_request_failure() handling. Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index d385c317..ab61c55 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3891,9 +3891,7 @@ EXPORT_SYMBOL(transport_generic_map_mem_to_cmd); static int transport_new_cmd_obj(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - u32 task_cdbs; - u32 rc; - int set_counts = 1; + int set_counts = 1, rc, task_cdbs; /* * Setup any BIDI READ tasks and memory from @@ -3911,7 +3909,7 @@ static int transport_new_cmd_obj(struct se_cmd *cmd) cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - return PYX_TRANSPORT_LU_COMM_FAILURE; + return -EINVAL; } atomic_inc(&cmd->t_fe_count); atomic_inc(&cmd->t_se_count); @@ -3930,7 +3928,7 @@ static int transport_new_cmd_obj(struct se_cmd *cmd) cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - return PYX_TRANSPORT_LU_COMM_FAILURE; + return -EINVAL; } if (set_counts) { @@ -4248,10 +4246,13 @@ static u32 transport_allocate_tasks( struct scatterlist *sgl, unsigned int sgl_nents) { - if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) + if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) { + if (transport_cmd_get_valid_sectors(cmd) < 0) + return -EINVAL; + return transport_allocate_data_tasks(cmd, lba, data_direction, sgl, sgl_nents); - else + } else return transport_allocate_control_task(cmd); } -- cgit v1.1 From 525a48a21da259d00d6ebc5b60563b5bcf022c26 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 13 Aug 2011 02:11:38 -0700 Subject: target: Fix task count > 1 handling breakage and use max_sector page alignment This patch addresses recent breakage with multiple se_task (task_count > 1) operation following backend dev->se_sub_dev->se_dev_attrib.max_sectors in new transport_allocate_data_tasks() code. The initial bug here was a bogus task->task_sg_nents assignment in transport_allocate_data_tasks() based on the passed parameter, which now uses DIV_ROUND_UP(task_size, PAGE_SIZE) to determine the proper number of per task SGL entries for the (task_count > 1) case. This also means we now need to enforce a PAGE_SIZE aligned max_sector count value for this to work as expected without bringing back the pre v3.1 transport_map_mem_to_sg() logic to handle SGL offsets across multiple tasks. So this patch adds se_dev_align_max_sectors() to round down max_sectors as necessary to ensure this alignment via se_dev_set_default_attribs() and se_dev_align_max_sectors() and keeps it simple for (task_count > 1) operation. So far this bugfix has been tested with (task_count > 1) operation using iscsi-target and iblock backends. Reported-by: Chris Boot Cc: Kiran Patil Cc: Andy Grover Cc: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 28 ++++++++++++++++++++++++++++ drivers/target/target_core_transport.c | 7 +++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index ec3fbcd..4b5237f 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -839,6 +839,24 @@ int se_dev_check_shutdown(struct se_device *dev) return ret; } +u32 se_dev_align_max_sectors(u32 max_sectors, u32 block_size) +{ + u32 tmp, aligned_max_sectors; + /* + * Limit max_sectors to a PAGE_SIZE aligned value for modern + * transport_allocate_data_tasks() operation. + */ + tmp = rounddown((max_sectors * block_size), PAGE_SIZE); + aligned_max_sectors = (tmp / block_size); + if (max_sectors != aligned_max_sectors) { + printk(KERN_INFO "Rounding down aligned max_sectors from %u" + " to %u\n", max_sectors, aligned_max_sectors); + return aligned_max_sectors; + } + + return max_sectors; +} + void se_dev_set_default_attribs( struct se_device *dev, struct se_dev_limits *dev_limits) @@ -878,6 +896,11 @@ void se_dev_set_default_attribs( * max_sectors is based on subsystem plugin dependent requirements. */ dev->se_sub_dev->se_dev_attrib.hw_max_sectors = limits->max_hw_sectors; + /* + * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks() + */ + limits->max_sectors = se_dev_align_max_sectors(limits->max_sectors, + limits->logical_block_size); dev->se_sub_dev->se_dev_attrib.max_sectors = limits->max_sectors; /* * Set optimal_sectors from max_sectors, which can be lowered via @@ -1242,6 +1265,11 @@ int se_dev_set_max_sectors(struct se_device *dev, u32 max_sectors) return -EINVAL; } } + /* + * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks() + */ + max_sectors = se_dev_align_max_sectors(max_sectors, + dev->se_sub_dev->se_dev_attrib.block_size); dev->se_sub_dev->se_dev_attrib.max_sectors = max_sectors; pr_debug("dev[%p]: SE Device max_sectors changed to %u\n", diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index ab61c55..efac6a9 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -4126,7 +4126,11 @@ static int transport_allocate_data_tasks( /* Update new cdb with updated lba/sectors */ cmd->transport_split_cdb(task->task_lba, task->task_sectors, cdb); - + /* + * This now assumes that passed sg_ents are in PAGE_SIZE chunks + * in order to calculate the number per task SGL entries + */ + task->task_sg_nents = DIV_ROUND_UP(task->task_size, PAGE_SIZE); /* * Check if the fabric module driver is requesting that all * struct se_task->task_sg[] be chained together.. If so, @@ -4136,7 +4140,6 @@ static int transport_allocate_data_tasks( * It's so much easier and only a waste when task_count > 1. * That is extremely rare. */ - task->task_sg_nents = sgl_nents; if (cmd->se_tfo->task_sg_chaining) { task->task_sg_nents++; task->task_padded_sg = 1; -- cgit v1.1 From c3c74c7a33d837be391ab61aaae39bb21f16736a Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 13 Aug 2011 05:30:31 -0700 Subject: target: Fix task SGL chaining breakage with transport_allocate_data_tasks This patch fixes two bugs associated with transport_do_task_sg_chain() operation where transport_allocate_data_tasks() was incorrectly setting task_padded_sg for all tasks, and causing bogus task->task_sg_nents assignments + OOPsen with fabrics depending upon this code. The first bit here adds a task_sg_nents_padded check in transport_allocate_data_tasks() to include an extra SGL vector when necessary for tasks that expect to be linked using sg_chain(). The second change involves making transport_do_task_sg_chain() properly account for the extra SGL vector when task->task_padded_sg is set for the non trailing ->task_sg or single ->task_sg allocations. Note this patch also removes the BUG_ON(!task->task_padded_sg) check within transport_do_task_sg_chain() as we expect this to happen normally with the updated logic in transport_allocate_data_tasks(), along with being bogus for CONTROL_SG_IO_CDB type payloads. So far this bugfix has been tested with tcm_qla2xxx and iblock backends in (task_count > 1)( and (task_count == 1) operation. Reported-by: Kiran Patil Cc: Kiran Patil Cc: Andy Grover Cc: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index efac6a9..9cc49d1 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -4044,8 +4044,6 @@ void transport_do_task_sg_chain(struct se_cmd *cmd) if (!task->task_sg) continue; - BUG_ON(!task->task_padded_sg); - if (!sg_first) { sg_first = task->task_sg; chained_nents = task->task_sg_nents; @@ -4053,9 +4051,19 @@ void transport_do_task_sg_chain(struct se_cmd *cmd) sg_chain(sg_prev, sg_prev_nents, task->task_sg); chained_nents += task->task_sg_nents; } + /* + * For the padded tasks, use the extra SGL vector allocated + * in transport_allocate_data_tasks() for the sg_prev_nents + * offset into sg_chain() above.. The last task of a + * multi-task list, or a single task will not have + * task->task_sg_padded set.. + */ + if (task->task_padded_sg) + sg_prev_nents = (task->task_sg_nents + 1); + else + sg_prev_nents = task->task_sg_nents; sg_prev = task->task_sg; - sg_prev_nents = task->task_sg_nents; } /* * Setup the starting pointer and total t_tasks_sg_linked_no including @@ -4107,7 +4115,7 @@ static int transport_allocate_data_tasks( cmd_sg = sgl; for (i = 0; i < task_count; i++) { - unsigned int task_size; + unsigned int task_size, task_sg_nents_padded; int count; task = transport_generic_get_task(cmd, data_direction); @@ -4135,24 +4143,24 @@ static int transport_allocate_data_tasks( * Check if the fabric module driver is requesting that all * struct se_task->task_sg[] be chained together.. If so, * then allocate an extra padding SG entry for linking and - * marking the end of the chained SGL. - * Possibly over-allocate task sgl size by using cmd sgl size. - * It's so much easier and only a waste when task_count > 1. - * That is extremely rare. + * marking the end of the chained SGL for every task except + * the last one for (task_count > 1) operation, or skipping + * the extra padding for the (task_count == 1) case. */ - if (cmd->se_tfo->task_sg_chaining) { - task->task_sg_nents++; + if (cmd->se_tfo->task_sg_chaining && (i < (task_count - 1))) { + task_sg_nents_padded = (task->task_sg_nents + 1); task->task_padded_sg = 1; - } + } else + task_sg_nents_padded = task->task_sg_nents; task->task_sg = kmalloc(sizeof(struct scatterlist) * - task->task_sg_nents, GFP_KERNEL); + task_sg_nents_padded, GFP_KERNEL); if (!task->task_sg) { cmd->se_dev->transport->free_task(task); return -ENOMEM; } - sg_init_table(task->task_sg, task->task_sg_nents); + sg_init_table(task->task_sg, task_sg_nents_padded); task_size = task->task_size; -- cgit v1.1 From 6626a0572657a0945a7b9ccf4a6d6ad1750f9adc Mon Sep 17 00:00:00 2001 From: Chris Boot Date: Sat, 13 Aug 2011 22:10:46 -0700 Subject: iscsi-target: Implement iSCSI target IPv6 address printing. The iSCSI target configfs code to print out an initiator's IPv6 address is not fully implemented. This patch uses snprintf() with the "%pI6c" format string to format the IPv6 address for display purposes. Signed-off-by: Chris Boot Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_login.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index bcaf82f..daad362 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1013,19 +1013,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) ISCSI_LOGIN_STATUS_TARGET_ERROR); goto new_sess_out; } -#if 0 - if (!iscsi_ntop6((const unsigned char *) - &sock_in6.sin6_addr.in6_u, - (char *)&conn->ipv6_login_ip[0], - IPV6_ADDRESS_SPACE)) { - pr_err("iscsi_ntop6() failed\n"); - iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, - ISCSI_LOGIN_STATUS_TARGET_ERROR); - goto new_sess_out; - } -#else - pr_debug("Skipping iscsi_ntop6()\n"); -#endif + snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c", + &sock_in6.sin6_addr.in6_u); + conn->login_port = ntohs(sock_in6.sin6_port); } else { memset(&sock_in, 0, sizeof(struct sockaddr_in)); -- cgit v1.1 From ba7736696341ad4253125055c0c85aa9f42959a0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 13 Aug 2011 22:35:00 -0700 Subject: iscsi-target: Fix iscsit_allocate_se_cmd_for_tmr failure path bugs This patch fixes two bugs in allocation failure handling in iscsit_allocate_se_cmd_for_tmr(): This first reported by DanC is a free-after call to transport_free_se_cmd(), this patch drops the transport_free_se_cmd() call all together, as iscsit_release_cmd() will release existing allocations as expected. The second is a bug where iscsi_cmd_t was being leaked on a cmd->tmr_req allocation failure, so make this jump to iscsit_release_cmd() as well. Signed-off-by: Dan Carpenter Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_util.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index a1acb01..a0d23bc 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -243,7 +243,7 @@ struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr( if (!cmd->tmr_req) { pr_err("Unable to allocate memory for" " Task Management command!\n"); - return NULL; + goto out; } /* * TASK_REASSIGN for ERL=2 / connection stays inside of @@ -298,8 +298,6 @@ struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr( return cmd; out: iscsit_release_cmd(cmd); - if (se_cmd) - transport_free_se_cmd(se_cmd); return NULL; } -- cgit v1.1 From f15ea5780d08e4c96930c0d607d05e480ec588c8 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 12 Aug 2011 10:01:24 -0700 Subject: target: Print subpage too for unhandled MODE SENSE pages Make a log message more useful by printing both the page and subpage that an initiator is requesting. Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_cdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c index 40ad142..89ae923 100644 --- a/drivers/target/target_core_cdb.c +++ b/drivers/target/target_core_cdb.c @@ -920,8 +920,8 @@ target_emulate_modesense(struct se_cmd *cmd, int ten) length += target_modesense_control(dev, &buf[offset+length]); break; default: - pr_err("Got Unknown Mode Page: 0x%02x\n", - cdb[2] & 0x3f); + pr_err("MODE SENSE: unimplemented page/subpage: 0x%02x/0x%02x\n", + cdb[2] & 0x3f, cdb[3]); return PYX_TRANSPORT_UNKNOWN_MODE_PAGE; } offset += length; -- cgit v1.1 From 4e0f05297ff615a9a4e269da301ff77f660a3ab0 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 12 Aug 2011 10:16:52 -0700 Subject: tcm_fc: init/exit functions should not be protected by "#ifdef MODULE" There's no need for the #ifdef protection when building into the kernel, and in fact we need the module_init() for the initialization function to be called. Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/target/tcm_fc/tfc_conf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 8781d1e..520a8ba 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -655,9 +655,7 @@ static void __exit ft_exit(void) synchronize_rcu(); } -#ifdef MODULE MODULE_DESCRIPTION("FC TCM fabric driver " FT_VERSION); MODULE_LICENSE("GPL"); module_init(ft_init); module_exit(ft_exit); -#endif /* MODULE */ -- cgit v1.1 From e63a8e1933a2218cf801e46dd01bd8cca4a555ec Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 12 Aug 2011 16:01:02 -0700 Subject: target: Make locking in transport_deregister_session() IRQ safe At least the tcm_qla2xxx fabric driver calls into transport_deregister_session() while holding an IRQ-disabled spinlock, so the inner locking needs to use spin_lock_irqsave() instead of spin_lock_bh(). This fixes warnings seen with tcm_qla2xxx like: WARNING: at kernel/softirq.c:159 local_bh_enable_ip+0x98/0xb0() Call Trace: [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] local_bh_enable_ip+0x98/0xb0 [] _raw_spin_unlock_bh+0x14/0x20 [] transport_deregister_session+0x96/0x180 [target_core_mod] [] tcm_qla2xxx_free_session+0xd1/0x170 [tcm_qla2xxx] [] qla_tgt_sess_put+0xc3/0x140 [qla2xxx] [] qla_tgt_stop_phase1+0x8f/0x2c0 [qla2xxx] [] tcm_qla2xxx_tpg_store_enable+0x6e/0xd0 [tcm_qla2xxx] [] target_fabric_tpg_attr_store+0x39/0x40 [target_core_mod] [] configfs_write_file+0xbd/0x120 [configfs] [] vfs_write+0xc6/0x180 [] sys_write+0x51/0x90 [] system_call_fastpath+0x16/0x1b Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 9cc49d1..8d0c58e 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -389,17 +389,18 @@ void transport_deregister_session(struct se_session *se_sess) { struct se_portal_group *se_tpg = se_sess->se_tpg; struct se_node_acl *se_nacl; + unsigned long flags; if (!se_tpg) { transport_free_session(se_sess); return; } - spin_lock_bh(&se_tpg->session_lock); + spin_lock_irqsave(&se_tpg->session_lock, flags); list_del(&se_sess->sess_list); se_sess->se_tpg = NULL; se_sess->fabric_sess_ptr = NULL; - spin_unlock_bh(&se_tpg->session_lock); + spin_unlock_irqrestore(&se_tpg->session_lock, flags); /* * Determine if we need to do extra work for this initiator node's @@ -407,22 +408,22 @@ void transport_deregister_session(struct se_session *se_sess) */ se_nacl = se_sess->se_node_acl; if (se_nacl) { - spin_lock_bh(&se_tpg->acl_node_lock); + spin_lock_irqsave(&se_tpg->acl_node_lock, flags); if (se_nacl->dynamic_node_acl) { if (!se_tpg->se_tpg_tfo->tpg_check_demo_mode_cache( se_tpg)) { list_del(&se_nacl->acl_list); se_tpg->num_node_acls--; - spin_unlock_bh(&se_tpg->acl_node_lock); + spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags); core_tpg_wait_for_nacl_pr_ref(se_nacl); core_free_device_list_for_node(se_nacl, se_tpg); se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg, se_nacl); - spin_lock_bh(&se_tpg->acl_node_lock); + spin_lock_irqsave(&se_tpg->acl_node_lock, flags); } } - spin_unlock_bh(&se_tpg->acl_node_lock); + spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags); } transport_free_session(se_sess); -- cgit v1.1 From 28638887f351d11867562322b7abaa014dd5528a Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 16 Aug 2011 09:40:01 -0700 Subject: target: Convert acl_node_lock to be IRQ-disabling With qla2xxx, acl_node_lock is taken inside qla2xxx's hardware_lock, which is taken in hardirq context. This means acl_node_lock must become an IRQ-disabling lock; in particular this fixes lockdep warnings along the lines of ====================================================== [ INFO: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected ] (&(&se_tpg->acl_node_lock)->rlock){+.....}, at: [] transport_deregister_session+0x92/0x140 [target_core_mod] and this task is already holding: (&(&ha->hardware_lock)->rlock){-.-...}, at: [] qla_tgt_stop_phase1+0x57/0x2c0 [qla2xxx] which would create a new lock dependency: (&(&ha->hardware_lock)->rlock){-.-...} -> (&(&se_tpg->acl_node_lock)->rlock){+.....} but this new dependency connects a HARDIRQ-irq-safe lock: (&(&ha->hardware_lock)->rlock){-.-...} to a HARDIRQ-irq-unsafe lock: (&(&se_tpg->acl_node_lock)->rlock){+.....} Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 16 ++++++------ drivers/target/target_core_pr.c | 8 +++--- drivers/target/target_core_tpg.c | 52 ++++++++++++++++++------------------- drivers/target/tcm_fc/tfc_conf.c | 4 +-- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 4b5237f..ca6e4a4 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -472,9 +472,9 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg) struct se_dev_entry *deve; u32 i; - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); list_for_each_entry(nacl, &tpg->acl_node_list, acl_list) { - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); spin_lock_irq(&nacl->device_list_lock); for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { @@ -491,9 +491,9 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg) } spin_unlock_irq(&nacl->device_list_lock); - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); } - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); } static struct se_port *core_alloc_port(struct se_device *dev) @@ -1372,17 +1372,17 @@ struct se_lun *core_dev_add_lun( */ if (tpg->se_tpg_tfo->tpg_check_demo_mode(tpg)) { struct se_node_acl *acl; - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); list_for_each_entry(acl, &tpg->acl_node_list, acl_list) { if (acl->dynamic_node_acl && (!tpg->se_tpg_tfo->tpg_check_demo_mode_login_only || !tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg))) { - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); core_tpg_add_node_to_devs(acl, tpg); - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); } } - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); } return lun_p; diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 1c1b849..7fd3a16 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1598,14 +1598,14 @@ static int core_scsi3_decode_spec_i_port( * from the decoded fabric module specific TransportID * at *i_str. */ - spin_lock_bh(&tmp_tpg->acl_node_lock); + spin_lock_irq(&tmp_tpg->acl_node_lock); dest_node_acl = __core_tpg_get_initiator_node_acl( tmp_tpg, i_str); if (dest_node_acl) { atomic_inc(&dest_node_acl->acl_pr_ref_count); smp_mb__after_atomic_inc(); } - spin_unlock_bh(&tmp_tpg->acl_node_lock); + spin_unlock_irq(&tmp_tpg->acl_node_lock); if (!dest_node_acl) { core_scsi3_tpg_undepend_item(tmp_tpg); @@ -3496,14 +3496,14 @@ after_iport_check: /* * Locate the destination struct se_node_acl from the received Transport ID */ - spin_lock_bh(&dest_se_tpg->acl_node_lock); + spin_lock_irq(&dest_se_tpg->acl_node_lock); dest_node_acl = __core_tpg_get_initiator_node_acl(dest_se_tpg, initiator_str); if (dest_node_acl) { atomic_inc(&dest_node_acl->acl_pr_ref_count); smp_mb__after_atomic_inc(); } - spin_unlock_bh(&dest_se_tpg->acl_node_lock); + spin_unlock_irq(&dest_se_tpg->acl_node_lock); if (!dest_node_acl) { pr_err("Unable to locate %s dest_node_acl for" diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 718ccd1..162b736 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -137,15 +137,15 @@ struct se_node_acl *core_tpg_get_initiator_node_acl( { struct se_node_acl *acl; - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); list_for_each_entry(acl, &tpg->acl_node_list, acl_list) { if (!strcmp(acl->initiatorname, initiatorname) && !acl->dynamic_node_acl) { - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return acl; } } - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return NULL; } @@ -309,10 +309,10 @@ struct se_node_acl *core_tpg_check_initiator_node_acl( else core_tpg_add_node_to_devs(acl, tpg); - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); list_add_tail(&acl->acl_list, &tpg->acl_node_list); tpg->num_node_acls++; - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); pr_debug("%s_TPG[%u] - Added DYNAMIC ACL with TCQ Depth: %d for %s" " Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(), @@ -362,7 +362,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( { struct se_node_acl *acl = NULL; - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname); if (acl) { if (acl->dynamic_node_acl) { @@ -370,7 +370,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( pr_debug("%s_TPG[%u] - Replacing dynamic ACL" " for %s\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname); - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); /* * Release the locally allocated struct se_node_acl * because * core_tpg_add_initiator_node_acl() returned @@ -386,10 +386,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( " Node %s already exists for TPG %u, ignoring" " request.\n", tpg->se_tpg_tfo->get_fabric_name(), initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return ERR_PTR(-EEXIST); } - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); if (!se_nacl) { pr_err("struct se_node_acl pointer is NULL\n"); @@ -426,10 +426,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( return ERR_PTR(-EINVAL); } - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); list_add_tail(&acl->acl_list, &tpg->acl_node_list); tpg->num_node_acls++; - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); done: pr_debug("%s_TPG[%hu] - Added ACL with TCQ Depth: %d for %s" @@ -453,14 +453,14 @@ int core_tpg_del_initiator_node_acl( struct se_session *sess, *sess_tmp; int dynamic_acl = 0; - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); if (acl->dynamic_node_acl) { acl->dynamic_node_acl = 0; dynamic_acl = 1; } list_del(&acl->acl_list); tpg->num_node_acls--; - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); spin_lock_bh(&tpg->session_lock); list_for_each_entry_safe(sess, sess_tmp, @@ -511,21 +511,21 @@ int core_tpg_set_initiator_node_queue_depth( struct se_node_acl *acl; int dynamic_acl = 0; - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname); if (!acl) { pr_err("Access Control List entry for %s Initiator" " Node %s does not exists for TPG %hu, ignoring" " request.\n", tpg->se_tpg_tfo->get_fabric_name(), initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return -ENODEV; } if (acl->dynamic_node_acl) { acl->dynamic_node_acl = 0; dynamic_acl = 1; } - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); spin_lock_bh(&tpg->session_lock); list_for_each_entry(sess, &tpg->tpg_sess_list, sess_list) { @@ -541,10 +541,10 @@ int core_tpg_set_initiator_node_queue_depth( tpg->se_tpg_tfo->get_fabric_name(), initiatorname); spin_unlock_bh(&tpg->session_lock); - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); if (dynamic_acl) acl->dynamic_node_acl = 1; - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return -EEXIST; } /* @@ -579,10 +579,10 @@ int core_tpg_set_initiator_node_queue_depth( if (init_sess) tpg->se_tpg_tfo->close_session(init_sess); - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); if (dynamic_acl) acl->dynamic_node_acl = 1; - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return -EINVAL; } spin_unlock_bh(&tpg->session_lock); @@ -598,10 +598,10 @@ int core_tpg_set_initiator_node_queue_depth( initiatorname, tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); if (dynamic_acl) acl->dynamic_node_acl = 1; - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return 0; } @@ -725,20 +725,20 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) * not been released because of TFO->tpg_check_demo_mode_cache() == 1 * in transport_deregister_session(). */ - spin_lock_bh(&se_tpg->acl_node_lock); + spin_lock_irq(&se_tpg->acl_node_lock); list_for_each_entry_safe(nacl, nacl_tmp, &se_tpg->acl_node_list, acl_list) { list_del(&nacl->acl_list); se_tpg->num_node_acls--; - spin_unlock_bh(&se_tpg->acl_node_lock); + spin_unlock_irq(&se_tpg->acl_node_lock); core_tpg_wait_for_nacl_pr_ref(nacl); core_free_device_list_for_node(nacl, se_tpg); se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg, nacl); - spin_lock_bh(&se_tpg->acl_node_lock); + spin_lock_irq(&se_tpg->acl_node_lock); } - spin_unlock_bh(&se_tpg->acl_node_lock); + spin_unlock_irq(&se_tpg->acl_node_lock); if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) core_tpg_release_virtual_lun0(se_tpg); diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 520a8ba..b15879d 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -256,7 +256,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata) struct se_portal_group *se_tpg = &tpg->se_tpg; struct se_node_acl *se_acl; - spin_lock_bh(&se_tpg->acl_node_lock); + spin_lock_irq(&se_tpg->acl_node_lock); list_for_each_entry(se_acl, &se_tpg->acl_node_list, acl_list) { acl = container_of(se_acl, struct ft_node_acl, se_node_acl); pr_debug("acl %p port_name %llx\n", @@ -270,7 +270,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata) break; } } - spin_unlock_bh(&se_tpg->acl_node_lock); + spin_unlock_irq(&se_tpg->acl_node_lock); return found; } -- cgit v1.1 From 259a187ade45056fd44856654f78aa9e9f0f7c75 Mon Sep 17 00:00:00 2001 From: Noah Watkins Date: Mon, 22 Aug 2011 13:49:41 -0600 Subject: ceph: fix memory leak kfree does not clean up indirect allocations in ceph_fs_client and ceph_options (e.g. snapdir_name). Signed-off-by: Noah Watkins Signed-off-by: Sage Weil --- fs/ceph/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index d47c5ec..88bacaf 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -813,8 +813,8 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, fsc = create_fs_client(fsopt, opt); if (IS_ERR(fsc)) { res = ERR_CAST(fsc); - kfree(fsopt); - kfree(opt); + destroy_mount_options(fsopt); + ceph_destroy_options(opt); goto out_final; } -- cgit v1.1 From 0e69d75ccb2f091757b38d4d6a2ed739e06b615e Mon Sep 17 00:00:00 2001 From: Andrew Bird Date: Tue, 16 Aug 2011 13:57:14 -0600 Subject: USB option driver add PID of Huawei Vodafone K3806 This patch adds the product ID of Huawei's Vodafone K3806 mobile broadband modem to option.c. This is necessary so that the driver gets loaded on demand without the intervention of usb_modeswitch. This has the benefit of it becoming available faster and also ensures that the option driver is not bound to a network interface that should be claimed by cdc_ether. Signed-off-by: Andrew Bird Signed-off-by: Alex Chiang Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 8156561..6e04222 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -148,6 +148,7 @@ static void option_instat_callback(struct urb *urb); #define HUAWEI_PRODUCT_K4505 0x1464 #define HUAWEI_PRODUCT_K3765 0x1465 #define HUAWEI_PRODUCT_E14AC 0x14AC +#define HUAWEI_PRODUCT_K3806 0x14AE #define HUAWEI_PRODUCT_K3770 0x14C9 #define HUAWEI_PRODUCT_K3771 0x14CA #define HUAWEI_PRODUCT_K4510 0x14CB @@ -551,6 +552,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3765, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_ETS1220, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E14AC, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3806, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x31) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x32) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x31) }, -- cgit v1.1 From 7e1805844da18a37e6d251d286f93c94b52d791e Mon Sep 17 00:00:00 2001 From: Andrew Bird Date: Tue, 16 Aug 2011 13:58:21 -0600 Subject: USB option driver add PID of Huawei Vodafone K4605 This patch adds the product ID of Huawei's Vodafone K4605 mobile broadband modem to option.c. This is necessary so that the driver gets loaded on demand without the intervention of usb_modeswitch. This has the benefit of it becoming available faster and also ensures that the option driver is not bound to a network interface that should be claimed by suitable network driver. Signed-off-by: Andrew Bird Signed-off-by: Alex Chiang Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 6e04222..ab86e0d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -149,6 +149,7 @@ static void option_instat_callback(struct urb *urb); #define HUAWEI_PRODUCT_K3765 0x1465 #define HUAWEI_PRODUCT_E14AC 0x14AC #define HUAWEI_PRODUCT_K3806 0x14AE +#define HUAWEI_PRODUCT_K4605 0x14C6 #define HUAWEI_PRODUCT_K3770 0x14C9 #define HUAWEI_PRODUCT_K3771 0x14CA #define HUAWEI_PRODUCT_K4510 0x14CB @@ -553,6 +554,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_ETS1220, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E14AC, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3806, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x31) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x32) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x31) }, @@ -1136,10 +1138,11 @@ static int option_probe(struct usb_serial *serial, serial->interface->cur_altsetting->desc.bInterfaceClass != 0xff) return -ENODEV; - /* Don't bind network interfaces on Huawei K3765 & K4505 */ + /* Don't bind network interfaces on Huawei K3765, K4505 & K4605 */ if (serial->dev->descriptor.idVendor == HUAWEI_VENDOR_ID && (serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K3765 || - serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505) && + serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505 || + serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4605) && serial->interface->cur_altsetting->desc.bInterfaceNumber == 1) return -ENODEV; -- cgit v1.1 From d0f2fb2500b1c5fe4967eb45d8c9bc758d7aef80 Mon Sep 17 00:00:00 2001 From: Wang Zhi Date: Wed, 17 Aug 2011 10:39:31 +0800 Subject: USB: EHCI: Do not rely on PORT_SUSPEND to stop USB resuming in ehci_bus_resume(). From EHCI Spec p.28 HC should clear PORT_SUSPEND when SW clears PORT_RESUME. In Intel Oaktrail platform, MPH (Multi-Port Host Controller) core clears PORT_SUSPEND directly when SW sets PORT_RESUME bit. If we rely on PORT_SUSPEND bit to stop USB resume, we will miss the action of clearing PORT_RESUME. This will cause unexpected long resume signal on USB bus. Signed-off-by: Wang Zhi Signed-off-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hub.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index e051b30..4c32cb1 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -343,7 +343,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd) u32 temp; u32 power_okay; int i; - u8 resume_needed = 0; + unsigned long resume_needed = 0; if (time_before (jiffies, ehci->next_statechange)) msleep(5); @@ -416,7 +416,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd) if (test_bit(i, &ehci->bus_suspended) && (temp & PORT_SUSPEND)) { temp |= PORT_RESUME; - resume_needed = 1; + set_bit(i, &resume_needed); } ehci_writel(ehci, temp, &ehci->regs->port_status [i]); } @@ -431,8 +431,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd) i = HCS_N_PORTS (ehci->hcs_params); while (i--) { temp = ehci_readl(ehci, &ehci->regs->port_status [i]); - if (test_bit(i, &ehci->bus_suspended) && - (temp & PORT_SUSPEND)) { + if (test_bit(i, &resume_needed)) { temp &= ~(PORT_RWC_BITS | PORT_RESUME); ehci_writel(ehci, temp, &ehci->regs->port_status [i]); ehci_vdbg (ehci, "resumed port %d\n", i + 1); -- cgit v1.1 From e5d3d4463fb30998385f9e78ab3c7f63b5813000 Mon Sep 17 00:00:00 2001 From: Yulgon Kim Date: Thu, 18 Aug 2011 14:02:45 +0900 Subject: usb: s5p-ehci: fix a NULL pointer deference This patch fixes a NULL pointer deference. A NULL pointer dereference happens since s5p_ehci->hcd field is not initialized yet in probe function. [jg1.han@samsung.com: edit commit message] Signed-off-by: Yulgon Kim Signed-off-by: Jingoo Han Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-s5p.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/ehci-s5p.c b/drivers/usb/host/ehci-s5p.c index b3958b3..9e77f1c 100644 --- a/drivers/usb/host/ehci-s5p.c +++ b/drivers/usb/host/ehci-s5p.c @@ -86,6 +86,7 @@ static int __devinit s5p_ehci_probe(struct platform_device *pdev) goto fail_hcd; } + s5p_ehci->hcd = hcd; s5p_ehci->clk = clk_get(&pdev->dev, "usbhost"); if (IS_ERR(s5p_ehci->clk)) { -- cgit v1.1 From c6eb2d75ffcdfafa37ff010bf467de20d468ef79 Mon Sep 17 00:00:00 2001 From: "Gavin.zhu" Date: Mon, 22 Aug 2011 13:51:53 -0700 Subject: USB: option: add YUGA device id to driver Signed-off-by: Gavin.zhu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 92 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ab86e0d..78452ba 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -418,6 +418,56 @@ static void option_instat_callback(struct urb *urb); #define SAMSUNG_VENDOR_ID 0x04e8 #define SAMSUNG_PRODUCT_GT_B3730 0x6889 +/* YUGA products www.yuga-info.com*/ +#define YUGA_VENDOR_ID 0x257A +#define YUGA_PRODUCT_CEM600 0x1601 +#define YUGA_PRODUCT_CEM610 0x1602 +#define YUGA_PRODUCT_CEM500 0x1603 +#define YUGA_PRODUCT_CEM510 0x1604 +#define YUGA_PRODUCT_CEM800 0x1605 +#define YUGA_PRODUCT_CEM900 0x1606 + +#define YUGA_PRODUCT_CEU818 0x1607 +#define YUGA_PRODUCT_CEU816 0x1608 +#define YUGA_PRODUCT_CEU828 0x1609 +#define YUGA_PRODUCT_CEU826 0x160A +#define YUGA_PRODUCT_CEU518 0x160B +#define YUGA_PRODUCT_CEU516 0x160C +#define YUGA_PRODUCT_CEU528 0x160D +#define YUGA_PRODUCT_CEU526 0x160F + +#define YUGA_PRODUCT_CWM600 0x2601 +#define YUGA_PRODUCT_CWM610 0x2602 +#define YUGA_PRODUCT_CWM500 0x2603 +#define YUGA_PRODUCT_CWM510 0x2604 +#define YUGA_PRODUCT_CWM800 0x2605 +#define YUGA_PRODUCT_CWM900 0x2606 + +#define YUGA_PRODUCT_CWU718 0x2607 +#define YUGA_PRODUCT_CWU716 0x2608 +#define YUGA_PRODUCT_CWU728 0x2609 +#define YUGA_PRODUCT_CWU726 0x260A +#define YUGA_PRODUCT_CWU518 0x260B +#define YUGA_PRODUCT_CWU516 0x260C +#define YUGA_PRODUCT_CWU528 0x260D +#define YUGA_PRODUCT_CWU526 0x260F + +#define YUGA_PRODUCT_CLM600 0x2601 +#define YUGA_PRODUCT_CLM610 0x2602 +#define YUGA_PRODUCT_CLM500 0x2603 +#define YUGA_PRODUCT_CLM510 0x2604 +#define YUGA_PRODUCT_CLM800 0x2605 +#define YUGA_PRODUCT_CLM900 0x2606 + +#define YUGA_PRODUCT_CLU718 0x2607 +#define YUGA_PRODUCT_CLU716 0x2608 +#define YUGA_PRODUCT_CLU728 0x2609 +#define YUGA_PRODUCT_CLU726 0x260A +#define YUGA_PRODUCT_CLU518 0x260B +#define YUGA_PRODUCT_CLU516 0x260C +#define YUGA_PRODUCT_CLU528 0x260D +#define YUGA_PRODUCT_CLU526 0x260F + /* some devices interfaces need special handling due to a number of reasons */ enum option_blacklist_reason { OPTION_BLACKLIST_NONE = 0, @@ -1009,6 +1059,48 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */ { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */ { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/ + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM500) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM510) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM800) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM900) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU818) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU816) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU828) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU826) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU518) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU516) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU528) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU526) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM600) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM610) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM500) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM510) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM800) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM900) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU718) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU716) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU728) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU726) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU518) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU516) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU528) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU526) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM600) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM610) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM500) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM510) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM800) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM900) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU718) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU716) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU728) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU726) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU518) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU516) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU528) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU526) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- cgit v1.1 From 6118514e8749105334f46ccec6faf9a439be6cf9 Mon Sep 17 00:00:00 2001 From: Andrew Bird Date: Wed, 17 Aug 2011 00:20:03 +0100 Subject: USB option driver K3765/K4505 avoid CDC_DATA interface Currently the Option driver avoids binding interface 1 on Huawei K3765 and K4505 broadband modems as it should be handled by the cdc_ether driver instead. This patch ensures we don't bind the interface 2 on those devices as that is CDC_DATA. Signed-off-by: Andrew Bird Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 78452ba..fe22e90 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1235,7 +1235,8 @@ static int option_probe(struct usb_serial *serial, (serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K3765 || serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505 || serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4605) && - serial->interface->cur_altsetting->desc.bInterfaceNumber == 1) + (serial->interface->cur_altsetting->desc.bInterfaceNumber == 1 || + serial->interface->cur_altsetting->desc.bInterfaceNumber == 2)) return -ENODEV; /* Don't bind network interface on Samsung GT-B3730, it is handled by a separate module */ -- cgit v1.1 From 858a914324c7786f483661e3a89bc8fbe50f1b9d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 16 Aug 2011 08:15:26 -0700 Subject: hwmon: (ntc_thermistor) Simplify if sequence Replace unnecessary if with else statement. This fixes the following (false) compile warning reported with some combinations of C compiler version and configuration. drivers/hwmon/ntc_thermistor.c: In function 'ntc_show_temp': drivers/hwmon/ntc_thermistor.c:225: warning: 'low' may be used uninitialized in this function drivers/hwmon/ntc_thermistor.c:225: note: 'low' was declared here drivers/hwmon/ntc_thermistor.c:225: warning: 'high' may be used uninitialized in this function drivers/hwmon/ntc_thermistor.c:225: note: 'high' was declared here drivers/hwmon/ntc_thermistor.c:294: warning: 'temp' may be used uninitialized in this function Signed-off-by: Guenter Roeck Acked-by: Jean Delvare --- drivers/hwmon/ntc_thermistor.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index d7926f4..eab1161 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -211,8 +211,7 @@ static int lookup_comp(struct ntc_data *data, if (data->comp[mid].ohm <= ohm) { *i_low = mid; *i_high = mid - 1; - } - if (data->comp[mid].ohm > ohm) { + } else { *i_low = mid + 1; *i_high = mid; } -- cgit v1.1 From 8ea95e08711f504d83281e762ca65a849a89593e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 2 Aug 2011 10:08:34 +0200 Subject: pti: add missing CONFIG_PCI dependency allmodconfig compile fails on s390 because of the new PTI driver: drivers/misc/pti.c:407:3: error: implicit declaration of function 'pci_iounmap' drivers/misc/pti.c:410:3: error: implicit declaration of function 'pci_release_region' Add a 'depends on PCI' statement so it doesn't get compiled. Cc: J Freyensee Signed-off-by: Tracey Dent Acked-by: Randy Dunlap Signed-off-by: Sergei Trofimovich Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- drivers/misc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 0a4d86c..2d6423c 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -146,6 +146,7 @@ config PHANTOM config INTEL_MID_PTI tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard" + depends on PCI default n help The PTI (Parallel Trace Interface) driver directs -- cgit v1.1 From 86ec67fd0a28c7f2b765e33aaf5b002d28c5f1fa Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Fri, 12 Aug 2011 14:58:31 -0700 Subject: base/devres.c: quiet sparse noise about context imbalance devres_release_all and devres_release_group both aquire the lock &dev->devres_lock but the release of that lock is done in release_nodes. This results in sparse noise about context imbalance. Add a lock annotation to release_nodes to quiet this noise. Signed-off-by: H Hartley Sweeten Signed-off-by: Greg Kroah-Hartman --- drivers/base/devres.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index cf7a0c7..65cd748 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -397,6 +397,7 @@ static int remove_nodes(struct device *dev, static int release_nodes(struct device *dev, struct list_head *first, struct list_head *end, unsigned long flags) + __releases(&dev->devres_lock) { LIST_HEAD(todo); int cnt; -- cgit v1.1 From 5926cef26c72cd121266b000b8975e6373cbf2b3 Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:30 -0500 Subject: drivers:misc: ti-st: avoid a misleading dbg msg Previously the private data of each protocol registered to use ST was used to determine whether the protocol was registered to use shared transport or otherwise. However, now a flag is_registered is maintained to identify whether a protocol intends to use ST. Upon closing of the UART the error message relevant to this lack of un-registration was misleading and this patch fixes that. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index 54c91ff..c8e335d 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -717,7 +717,7 @@ static void st_tty_close(struct tty_struct *tty) */ spin_lock_irqsave(&st_gdata->lock, flags); for (i = ST_BT; i < ST_MAX_CHANNELS; i++) { - if (st_gdata->list[i] != NULL) + if (st_gdata->is_registered[i] == true) pr_err("%d not un-registered", i); st_gdata->list[i] = NULL; } -- cgit v1.1 From 0d7c5f2572ccfa7bf83292b1506926663f2d164a Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:31 -0500 Subject: drivers:misc:ti-st: platform hooks for chip states Certain platform specific or Host-WiLink Interface specific actions would be required to be taken when the chip is being enabled and after the chip is disabled such as configuration of the mux modes for the GPIO of host connected to the nshutdown of the chip or relinquishing UART after the chip is disabled. Similar actions can also be taken when the chip is in deep sleep or when the chip is awake. Performance enhancements such as configuring the host to run faster when chip is awake and slower when chip is asleep can also be made here. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 12 ++++++++++++ drivers/misc/ti-st/st_ll.c | 19 +++++++++++++++++++ include/linux/ti_wilink_st.h | 27 ++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index 38fd2f0..6884dd1 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -434,11 +434,17 @@ long st_kim_start(void *kim_data) { long err = 0; long retry = POR_RETRY_COUNT; + struct ti_st_plat_data *pdata; struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; pr_info(" %s", __func__); + pdata = kim_gdata->kim_pdev->dev.platform_data; do { + /* platform specific enabling code here */ + if (pdata->chip_enable) + pdata->chip_enable(kim_gdata); + /* Configure BT nShutdown to HIGH state */ gpio_set_value(kim_gdata->nshutdown, GPIO_LOW); mdelay(5); /* FIXME: a proper toggle */ @@ -489,6 +495,8 @@ long st_kim_stop(void *kim_data) { long err = 0; struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; + struct ti_st_plat_data *pdata = + kim_gdata->kim_pdev->dev.platform_data; INIT_COMPLETION(kim_gdata->ldisc_installed); @@ -515,6 +523,10 @@ long st_kim_stop(void *kim_data) gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH); mdelay(1); gpio_set_value(kim_gdata->nshutdown, GPIO_LOW); + + /* platform specific disable */ + if (pdata->chip_disable) + pdata->chip_disable(kim_gdata); return err; } diff --git a/drivers/misc/ti-st/st_ll.c b/drivers/misc/ti-st/st_ll.c index 3f24951..1ff460a 100644 --- a/drivers/misc/ti-st/st_ll.c +++ b/drivers/misc/ti-st/st_ll.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) "(stll) :" fmt #include #include +#include #include /**********************************************************************/ @@ -37,6 +38,9 @@ static void send_ll_cmd(struct st_data_s *st_data, static void ll_device_want_to_sleep(struct st_data_s *st_data) { + struct kim_data_s *kim_data; + struct ti_st_plat_data *pdata; + pr_debug("%s", __func__); /* sanity check */ if (st_data->ll_state != ST_LL_AWAKE) @@ -46,10 +50,19 @@ static void ll_device_want_to_sleep(struct st_data_s *st_data) send_ll_cmd(st_data, LL_SLEEP_ACK); /* update state */ st_data->ll_state = ST_LL_ASLEEP; + + /* communicate to platform about chip asleep */ + kim_data = st_data->kim_data; + pdata = kim_data->kim_pdev->dev.platform_data; + if (pdata->chip_asleep) + pdata->chip_asleep(NULL); } static void ll_device_want_to_wakeup(struct st_data_s *st_data) { + struct kim_data_s *kim_data; + struct ti_st_plat_data *pdata; + /* diff actions in diff states */ switch (st_data->ll_state) { case ST_LL_ASLEEP: @@ -70,6 +83,12 @@ static void ll_device_want_to_wakeup(struct st_data_s *st_data) } /* update state */ st_data->ll_state = ST_LL_AWAKE; + + /* communicate to platform about chip wakeup */ + kim_data = st_data->kim_data; + pdata = kim_data->kim_pdev->dev.platform_data; + if (pdata->chip_asleep) + pdata->chip_awake(NULL); } /**********************************************************************/ diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h index b004e55..2ef4385 100644 --- a/include/linux/ti_wilink_st.h +++ b/include/linux/ti_wilink_st.h @@ -410,7 +410,28 @@ struct gps_event_hdr { u16 plen; } __attribute__ ((packed)); -/* platform data */ +/** + * struct ti_st_plat_data - platform data shared between ST driver and + * platform specific board file which adds the ST device. + * @nshutdown_gpio: Host's GPIO line to which chip's BT_EN is connected. + * @dev_name: The UART/TTY name to which chip is interfaced. (eg: /dev/ttyS1) + * @flow_cntrl: Should always be 1, since UART's CTS/RTS is used for PM + * purposes. + * @baud_rate: The baud rate supported by the Host UART controller, this will + * be shared across with the chip via a HCI VS command from User-Space Init + * Mgr application. + * @suspend: + * @resume: legacy PM routines hooked to platform specific board file, so as + * to take chip-host interface specific action. + * @chip_enable: + * @chip_disable: Platform/Interface specific mux mode setting, GPIO + * configuring, Host side PM disabling etc.. can be done here. + * @chip_asleep: + * @chip_awake: Chip specific deep sleep states is communicated to Host + * specific board-xx.c to take actions such as cut UART clocks when chip + * asleep or run host faster when chip awake etc.. + * + */ struct ti_st_plat_data { long nshutdown_gpio; unsigned char dev_name[UART_DEV_NAME_LEN]; /* uart name */ @@ -418,6 +439,10 @@ struct ti_st_plat_data { unsigned long baud_rate; int (*suspend)(struct platform_device *, pm_message_t); int (*resume)(struct platform_device *); + int (*chip_enable) (struct kim_data_s *); + int (*chip_disable) (struct kim_data_s *); + int (*chip_asleep) (struct kim_data_s *); + int (*chip_awake) (struct kim_data_s *); }; #endif /* TI_WILINK_ST_H */ -- cgit v1.1 From 74a4fcf19eed6550651f455db5741fd216b4f004 Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:32 -0500 Subject: drivers:misc: ti-st: reinit completion on ver read After the version information has been read, the completion which assists in wait_for_completion during the firmware send/wait sequence is being re-used and hence this needs to be re-initialised for fool proof firmware download retries. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index 6884dd1..e5639ca 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -210,6 +210,7 @@ static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name) pr_err(" waiting for ver info- timed out "); return -ETIMEDOUT; } + INIT_COMPLETION(kim_gdata->kim_rcvd); version = MAKEWORD(kim_gdata->resp_buffer[13], -- cgit v1.1 From 78bb9697e2c4b62c426f1a2571c293a2e4463adf Mon Sep 17 00:00:00 2001 From: Vijay Badawadagi Date: Wed, 10 Aug 2011 10:18:33 -0500 Subject: drivers:misc: ti-st: fail-safe on wrong pkt type Texas Instrument's shared transport driver interpret incoming data from the UART based on the various protocol drivers registered to the driver such as btwilink driver or FM or GPS driver which provide logical channel IDs. In case of bad-behavior from chip such as HCI Event response for a GPS command or a HCI Event (h/w error event) for a FM response & In case of bad-behavior from UART driver such as dropping data bytes a fail-safe is required to avoid kernel panic. Signed-off-by: Pavan Savoy Signed-off-by: Vijay Badawadagi Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index c8e335d..1f973ce 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -338,6 +338,12 @@ void st_int_recv(void *disc_data, /* Unknow packet? */ default: type = *ptr; + if (st_gdata->list[type] == NULL) { + pr_err("chip/interface misbehavior dropping" + " frame starting with 0x%02x", type); + goto done; + + } st_gdata->rx_skb = alloc_skb( st_gdata->list[type]->max_frame_size, GFP_ATOMIC); @@ -354,6 +360,7 @@ void st_int_recv(void *disc_data, ptr++; count--; } +done: spin_unlock_irqrestore(&st_gdata->lock, flags); pr_debug("done %s", __func__); return; -- cgit v1.1 From 2f81a02ce0693863019dc3fcc532533af6dc0dcd Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:34 -0500 Subject: drivers:misc: ti-st: reinit completion before send download firmware behaves differently at different times, when logs are enabled and the system is loaded, the wait_for_completion is able to wait for every send, However during other times the wait does not happen. So, for reliability reinitializing the completion before every send, makes sure the wait happens for every send. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index e5639ca..1748a93 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -299,6 +299,7 @@ static long download_firmware(struct kim_data_s *kim_gdata) switch (((struct bts_action *)ptr)->type) { case ACTION_SEND_COMMAND: /* action send */ + pr_debug("S"); action_ptr = &(((struct bts_action *)ptr)->data[0]); if (unlikely (((struct hci_command *)action_ptr)->opcode == @@ -336,6 +337,10 @@ static long download_firmware(struct kim_data_s *kim_gdata) release_firmware(kim_gdata->fw_entry); return -ETIMEDOUT; } + /* reinit completion before sending for the + * relevant wait + */ + INIT_COMPLETION(kim_gdata->kim_rcvd); /* * Free space found in uart buffer, call st_int_write @@ -362,6 +367,7 @@ static long download_firmware(struct kim_data_s *kim_gdata) } break; case ACTION_WAIT_EVENT: /* wait */ + pr_debug("W"); if (!wait_for_completion_timeout (&kim_gdata->kim_rcvd, msecs_to_jiffies(CMD_RESP_TIME))) { -- cgit v1.1 From d0344ef670d686628f369e649c86f71c90ebe222 Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:35 -0500 Subject: drivers:misc: ti-st: wait for completion at fail When the line discipline install fails for reasons such as missing user-space UIM or broken communication between UIM and ST driver, then the ST attempts/retries to request for ldisc installation again. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index 1748a93..d8ca406 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -473,6 +473,12 @@ long st_kim_start(void *kim_data) pr_info("ldisc_install = 0"); sysfs_notify(&kim_gdata->kim_pdev->dev.kobj, NULL, "install"); + /* the following wait is never going to be completed, + * since the ldisc was never installed, hence serving + * as a mdelay of LDISC_TIME msecs */ + err = wait_for_completion_timeout + (&kim_gdata->ldisc_installed, + msecs_to_jiffies(LDISC_TIME)); err = -ETIMEDOUT; continue; } else { @@ -485,6 +491,13 @@ long st_kim_start(void *kim_data) pr_info("ldisc_install = 0"); sysfs_notify(&kim_gdata->kim_pdev->dev.kobj, NULL, "install"); + /* this wait might be completed, though in the + * tty_close() since the ldisc is already + * installed */ + err = wait_for_completion_timeout + (&kim_gdata->ldisc_installed, + msecs_to_jiffies(LDISC_TIME)); + err = -EINVAL; continue; } else { /* on success don't retry */ break; -- cgit v1.1 From 76ff0e64d42fac59fb756536342a3d3f3e4e8833 Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:36 -0500 Subject: drivers:misc: ti-st: free skb on firmware download If during validation of the firmware download the data doesn't match what is expected out of the chip, this calls for a firmware download failure and a retry. Free the SKB which collects response during such scenarios. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index d8ca406..3a35805 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -68,6 +68,7 @@ void validate_firmware_response(struct kim_data_s *kim_gdata) if (unlikely(skb->data[5] != 0)) { pr_err("no proper response during fw download"); pr_err("data6 %x", skb->data[5]); + kfree_skb(skb); return; /* keep waiting for the proper response */ } /* becos of all the script being downloaded */ -- cgit v1.1 From 651d62a8b0378b911f083a1712d9d228894f46d8 Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:37 -0500 Subject: drivers:misc: ti-st: fix unexpected UART close If suppose the UIM were to die and hence UART were to close when the Bluetooth/FM or GPS is turned on, prep the ST for a state where-in if the UIM comes back up, Bluetooth/FM/GPS can be turned on. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index 1f973ce..ba168a7 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -727,6 +727,7 @@ static void st_tty_close(struct tty_struct *tty) if (st_gdata->is_registered[i] == true) pr_err("%d not un-registered", i); st_gdata->list[i] = NULL; + st_gdata->is_registered[i] = false; } st_gdata->protos_registered = 0; spin_unlock_irqrestore(&st_gdata->lock, flags); -- cgit v1.1 From 6c4b47d243112e98811ce0da7bbb32cc3857dd1a Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Wed, 20 Jul 2011 20:17:49 +0900 Subject: pch_uart: Set PCIe bus number using probe parameter Currently, PCIe bus number is set as fixed value "2". However, PCIe bus number is not always "2". This patch sets bus number using probe() parameter. Signed-off-by: Tomoya MORINAGA Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pch_uart.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index 846dfcd..b46218d 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -598,7 +598,8 @@ static void pch_request_dma(struct uart_port *port) dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); - dma_dev = pci_get_bus_and_slot(2, PCI_DEVFN(0xa, 0)); /* Get DMA's dev + dma_dev = pci_get_bus_and_slot(priv->pdev->bus->number, + PCI_DEVFN(0xa, 0)); /* Get DMA's dev information */ /* Set Tx DMA */ param = &priv->param_tx; -- cgit v1.1 From 181d5762bd8eaa2881b7df27bad260bf4abda1bc Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 4 Aug 2011 03:13:10 -0500 Subject: drivers/serial/ucc_uart.c: Fix compiler warning drivers/tty/serial/ucc_uart.c: In function 'qe2cpu_addr': drivers/tty/serial/ucc_uart.c:238:2: warning: format '%x' expects type 'unsigned int', but argument 3 has type 'dma_addr_t' Signed-off-by: Kumar Gala Acked-by: Timur Tabi Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/ucc_uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c index c327218..9af9f08 100644 --- a/drivers/tty/serial/ucc_uart.c +++ b/drivers/tty/serial/ucc_uart.c @@ -235,7 +235,7 @@ static inline void *qe2cpu_addr(dma_addr_t addr, struct uart_qe_port *qe_port) return qe_port->bd_virt + (addr - qe_port->bd_dma_addr); /* something nasty happened */ - printk(KERN_ERR "%s: addr=%x\n", __func__, addr); + printk(KERN_ERR "%s: addr=%llx\n", __func__, (u64)addr); BUG(); return NULL; } -- cgit v1.1 From ab8ba3a2d2cba6a658ef596cd5b2e0905b6c8a9f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 16 Aug 2011 12:02:28 -0600 Subject: serial: 8250_pnp: add Intermec CV60 touchscreen device It would have been nice if Intermec had supplied a PNP0501 _CID for the COM3 device, but they didn't, so we have to recognize it explicitly. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=40612 CC: Jeff Chua Signed-off-by: Bjorn Helgaas Cc: stable Acked-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250_pnp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/8250_pnp.c b/drivers/tty/serial/8250_pnp.c index fc301f6..a2f2365 100644 --- a/drivers/tty/serial/8250_pnp.c +++ b/drivers/tty/serial/8250_pnp.c @@ -109,6 +109,9 @@ static const struct pnp_device_id pnp_dev_table[] = { /* IBM */ /* IBM Thinkpad 701 Internal Modem Voice */ { "IBM0033", 0 }, + /* Intermec */ + /* Intermec CV60 touchscreen port */ + { "PNP4972", 0 }, /* Intertex */ /* Intertex 28k8 33k6 Voice EXT PnP */ { "IXDC801", 0 }, -- cgit v1.1 From 0d0a3cc183c50956fe1d9e37cca520debea93ad5 Mon Sep 17 00:00:00 2001 From: "Voss, Nikolaus" Date: Wed, 10 Aug 2011 14:02:29 +0200 Subject: atmel_serial: fix atmel_default_console_device reflect new static uart platform ids introduced by patch http://article.gmane.org/gmane.linux.kernel/1126105 Signed-off-by: Nikolaus Voss Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index af9b781..b922f5d 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1609,9 +1609,11 @@ static struct console atmel_console = { static int __init atmel_console_init(void) { if (atmel_default_console_device) { - add_preferred_console(ATMEL_DEVICENAME, - atmel_default_console_device->id, NULL); - atmel_init_port(&atmel_ports[atmel_default_console_device->id], + struct atmel_uart_data *pdata = + atmel_default_console_device->dev.platform_data; + + add_preferred_console(ATMEL_DEVICENAME, pdata->num, NULL); + atmel_init_port(&atmel_ports[pdata->num], atmel_default_console_device); register_console(&atmel_console); } -- cgit v1.1 From b6bede3b4cdfbd188557ab50fceec2e91d295edf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 14 Aug 2011 17:13:00 +0000 Subject: xfs: fix tracing builds inside the source tree The code really requires the current source directory to be in the header search path. We already do this if building with an object tree separate from the source, but it needs to be added manually if building inside the source. The cflags addition for it accidentally got removed when collapsing the xfs directory structure. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index ffce328..427a4e8 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -16,6 +16,8 @@ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # +ccflags-y += -I$(src) # needed for trace events + ccflags-$(CONFIG_XFS_DEBUG) += -g obj-$(CONFIG_XFS_FS) += xfs.o -- cgit v1.1 From a2cc797d2d1a116b607de353de0ae1c2cab80b74 Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Mon, 22 Aug 2011 12:39:10 -0700 Subject: i915: do not setup intel_backlight twice The commit "Not all systems expose a firmware or platform mechanism for changing the backlight intensity on i915, so add native driver support" adds calls to intel_panel_setup_backlight() from intel_{lvds,dp}_init so do not call it again from intel_setup_outputs(). BugLink: http://bugs.launchpad.net/bugs/831542 Signed-off-by: Kamal Mostafa ACKed-by: Matthew Garrett Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_display.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ee1d701..5a1ae9f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7238,8 +7238,6 @@ static void intel_setup_outputs(struct drm_device *dev) intel_encoder_clones(dev, encoder->clone_mask); } - intel_panel_setup_backlight(dev); - /* disable all the possible outputs/crtcs before entering KMS mode */ drm_helper_disable_unused_functions(dev); } -- cgit v1.1 From e21757a05730f03f18fbfc528a919e0205aa6a61 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Mon, 22 Aug 2011 16:13:00 -0600 Subject: OMAP3: clock: indicate that gpt12_fck and wdt1_fck are in the WKUP clockdomain The oscillator that supplies GPT12_FCLK and WDT1_FCLK exists in the WKUP powerdomain[1]. This resolves at least one boot-time warning: omap_hwmod: gpt12_fck: missing clockdomain for gpt12_fck. 1. _OMAP34xx Multimedia High Security (HS) Device Silicon Revision 3.1.x Security Addendum Version K (SWPU119K)_ Figure 3-29. August 2010. --- arch/arm/mach-omap2/clock3xxx_data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-omap2/clock3xxx_data.c b/arch/arm/mach-omap2/clock3xxx_data.c index ffd55b1..b9b8446 100644 --- a/arch/arm/mach-omap2/clock3xxx_data.c +++ b/arch/arm/mach-omap2/clock3xxx_data.c @@ -3078,6 +3078,7 @@ static struct clk gpt12_fck = { .name = "gpt12_fck", .ops = &clkops_null, .parent = &secure_32k_fck, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -3085,6 +3086,7 @@ static struct clk wdt1_fck = { .name = "wdt1_fck", .ops = &clkops_null, .parent = &secure_32k_fck, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; -- cgit v1.1 From 81a081fff7f3c144a0da9ee726906e533f66dd89 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 22 Aug 2011 09:22:41 -0500 Subject: sound/soc/fsl/fsl_dma.c: add missing of_node_put of_parse_phandle increments the reference count of np, so this should be decremented before trying the next possibility. Since we don't actually use np, we can decrement the reference count immediately. Reported-by: Julia Lawall Signed-off-by: Timur Tabi Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/fsl/fsl_dma.c b/sound/soc/fsl/fsl_dma.c index 732208c..cb50598 100644 --- a/sound/soc/fsl/fsl_dma.c +++ b/sound/soc/fsl/fsl_dma.c @@ -879,10 +879,12 @@ static struct device_node *find_ssi_node(struct device_node *dma_channel_np) * assume that device_node pointers are a valid comparison. */ np = of_parse_phandle(ssi_np, "fsl,playback-dma", 0); + of_node_put(np); if (np == dma_channel_np) return ssi_np; np = of_parse_phandle(ssi_np, "fsl,capture-dma", 0); + of_node_put(np); if (np == dma_channel_np) return ssi_np; } -- cgit v1.1 From 57cf9d4512c86a4a1b58857ca22b18bffbfd6812 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 20 Aug 2011 11:03:44 +0800 Subject: ASoC: soc-core: use GFP_KERNEL flag for kmalloc in snd_soc_cnew GFP_ATOMIC is not needed here, use GFP_KERNEL instead. Signed-off-by: Axel Lin Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 83ad8ca..b085d8e 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1913,7 +1913,7 @@ struct snd_kcontrol *snd_soc_cnew(const struct snd_kcontrol_new *_template, if (prefix) { name_len = strlen(long_name) + strlen(prefix) + 2; - name = kmalloc(name_len, GFP_ATOMIC); + name = kmalloc(name_len, GFP_KERNEL); if (!name) return NULL; -- cgit v1.1 From 96101bd0bf7974686f6875c065a7a9a83cd2107a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 20 Aug 2011 08:12:38 +0200 Subject: sound/soc/kirkwood/kirkwood-i2s.c: add missing kfree Adjust the goto to jump to the error handling code that includes kfree. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; expression E1!=0,E2,E3,E4; statement S; iterator I; @@ ( if (...) { ... when != kfree(x) when != x = E3 when != E3 = x * return ...; } ... when != x = E2 when != I(...,x,...) S if (...) { ... when != x = E4 kfree(x); ... return ...; } ) // Signed-off-by: Julia Lawall Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/kirkwood/kirkwood-i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c index a33fc51..8f16cd3 100644 --- a/sound/soc/kirkwood/kirkwood-i2s.c +++ b/sound/soc/kirkwood/kirkwood-i2s.c @@ -424,7 +424,7 @@ static __devinit int kirkwood_i2s_dev_probe(struct platform_device *pdev) if (!priv->mem) { dev_err(&pdev->dev, "request_mem_region failed\n"); err = -EBUSY; - goto error; + goto error_alloc; } priv->io = ioremap(priv->mem->start, SZ_16K); -- cgit v1.1 From 5006b313283c56cfda498513b7091692bcd74433 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 20 Aug 2011 08:12:39 +0200 Subject: sound/soc/ep93xx/ep93xx-i2s.c: add missing kfree Introduce a new label that includes kfree and jump to that one. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; expression E1!=0,E2,E3,E4; statement S; iterator I; @@ ( if (...) { ... when != kfree(x) when != x = E3 when != E3 = x * return ...; } ... when != x = E2 when != I(...,x,...) S if (...) { ... when != x = E4 kfree(x); ... return ...; } ) // Signed-off-by: Julia Lawall Acked-by: Alexander Sverdlin Reviewed-by: H Hartley Sweeten Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/ep93xx/ep93xx-i2s.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/ep93xx/ep93xx-i2s.c b/sound/soc/ep93xx/ep93xx-i2s.c index 56efa0c..099614e 100644 --- a/sound/soc/ep93xx/ep93xx-i2s.c +++ b/sound/soc/ep93xx/ep93xx-i2s.c @@ -385,14 +385,14 @@ static int ep93xx_i2s_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { err = -ENODEV; - goto fail; + goto fail_free_info; } info->mem = request_mem_region(res->start, resource_size(res), pdev->name); if (!info->mem) { err = -EBUSY; - goto fail; + goto fail_free_info; } info->regs = ioremap(info->mem->start, resource_size(info->mem)); @@ -435,6 +435,7 @@ fail_unmap_mem: iounmap(info->regs); fail_release_mem: release_mem_region(info->mem->start, resource_size(info->mem)); +fail_free_info: kfree(info); fail: return err; -- cgit v1.1 From 178b279b645a14ca8ea01e4ea818c88681a31b07 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 20 Aug 2011 09:02:00 +0200 Subject: sound/soc/fsl/p1022_ds.c: add missing of_node_put dma_channel_np has been accessed at this point, so decrease its reference count before leaving the function. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; expression E1!=0,E2,E3,E4; statement S; iterator I; @@ ( if (...) { ... when != of_node_put(x) when != x = E3 when != E3 = x * return ...; } ... when != x = E2 when != I(...,x,...) S if (...) { ... when != x = E4 of_node_put(x); ... return ...; } ) // Signed-off-by: Julia Lawall Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/fsl/p1022_ds.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/fsl/p1022_ds.c b/sound/soc/fsl/p1022_ds.c index 8fa4d5f..fcb862e 100644 --- a/sound/soc/fsl/p1022_ds.c +++ b/sound/soc/fsl/p1022_ds.c @@ -297,8 +297,10 @@ static int get_dma_channel(struct device_node *ssi_np, * dai->platform name should already point to an allocated buffer. */ ret = of_address_to_resource(dma_channel_np, 0, &res); - if (ret) + if (ret) { + of_node_put(dma_channel_np); return ret; + } snprintf((char *)dai->platform_name, DAI_NAME_SIZE, "%llx.%s", (unsigned long long) res.start, dma_channel_np->name); -- cgit v1.1 From c09f5ca7bdc9a82c5f721bc28c46d65452240cfa Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 20 Aug 2011 09:02:01 +0200 Subject: sound/soc/fsl/mpc8610_hpcd.c: add missing of_node_put The first change is to add an of_node_put, since codec_np has previously been allocated. The rest of the patch reorganizes the error handling code so the only code executed is that which is needed. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; expression E1!=0,E2,E3,E4; statement S; iterator I; @@ ( if (...) { ... when != of_node_put(x) when != x = E3 when != E3 = x * return ...; } ... when != x = E2 when != I(...,x,...) S if (...) { ... when != x = E4 of_node_put(x); ... return ...; } ) // Signed-off-by: Julia Lawall Acked-by: Timur Tabi Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/fsl/mpc8610_hpcd.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sound/soc/fsl/mpc8610_hpcd.c b/sound/soc/fsl/mpc8610_hpcd.c index a192979..358f0ba 100644 --- a/sound/soc/fsl/mpc8610_hpcd.c +++ b/sound/soc/fsl/mpc8610_hpcd.c @@ -345,8 +345,10 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev) } machine_data = kzalloc(sizeof(struct mpc8610_hpcd_data), GFP_KERNEL); - if (!machine_data) - return -ENOMEM; + if (!machine_data) { + ret = -ENOMEM; + goto error_alloc; + } machine_data->dai[0].cpu_dai_name = dev_name(&ssi_pdev->dev); machine_data->dai[0].ops = &mpc8610_hpcd_ops; @@ -494,7 +496,7 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev) ret = platform_device_add(sound_device); if (ret) { dev_err(&pdev->dev, "platform device add failed\n"); - goto error; + goto error_sound; } dev_set_drvdata(&pdev->dev, sound_device); @@ -502,14 +504,12 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev) return 0; +error_sound: + platform_device_unregister(sound_device); error: - of_node_put(codec_np); - - if (sound_device) - platform_device_unregister(sound_device); - kfree(machine_data); - +error_alloc: + of_node_put(codec_np); return ret; } -- cgit v1.1 From 0278ccd9d53e07c4e699432b2fed9de6c56f506c Mon Sep 17 00:00:00 2001 From: Chris Boot Date: Mon, 22 Aug 2011 21:38:38 +0100 Subject: firewire: sbp2: fix panic after rmmod with slow targets If firewire-sbp2 starts a login to a target that doesn't complete ORBs in a timely manner (and has to retry the login), and the module is removed before the operation times out, you end up with a null-pointer dereference and a kernel panic. [SR: This happens because sbp2_target_get/put() do not maintain module references. scsi_device_get/put() do, but at occasions like Chris describes one, nobody holds a reference to an SBP-2 sdev.] This patch cancels pending work for each unit in sbp2_remove(), which hopefully means there are no extra references around that prevent us from unloading. This fixes my crash. Signed-off-by: Chris Boot Signed-off-by: Stefan Richter --- drivers/firewire/sbp2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 41841a3..17cef86 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -1198,6 +1198,10 @@ static int sbp2_remove(struct device *dev) { struct fw_unit *unit = fw_unit(dev); struct sbp2_target *tgt = dev_get_drvdata(&unit->device); + struct sbp2_logical_unit *lu; + + list_for_each_entry(lu, &tgt->lu_list, link) + cancel_delayed_work_sync(&lu->work); sbp2_target_put(tgt); return 0; -- cgit v1.1 From 11f3a6bdc2528d1ce2af50202dbf7138fdee1b34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Aug 2011 06:05:59 +0000 Subject: bridge: fix a possible net_device leak Jan Beulich reported a possible net_device leak in bridge code after commit bb900b27a2f4 (bridge: allow creating bridge devices with netlink) Reported-by: Jan Beulich Signed-off-by: Eric Dumazet Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 2cdf007..e738154 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -231,6 +231,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, int br_add_bridge(struct net *net, const char *name) { struct net_device *dev; + int res; dev = alloc_netdev(sizeof(struct net_bridge), name, br_dev_setup); @@ -240,7 +241,10 @@ int br_add_bridge(struct net *net, const char *name) dev_net_set(dev, net); - return register_netdev(dev); + res = register_netdev(dev); + if (res) + free_netdev(dev); + return res; } int br_del_bridge(struct net *net, const char *name) -- cgit v1.1 From f5e4282586dc0c9dab8c7d32e6c43aa07f68586b Mon Sep 17 00:00:00 2001 From: Jeremiah Matthey Date: Tue, 23 Aug 2011 09:44:30 +0200 Subject: HID: usbhid: Add support for SiGma Micro chip Patch to add SiGma Micro-based keyboards (1c4f:0002) to hid-quirks. These keyboards dont seem to allow the records to be initialized, and hence a timeout occurs when the usbhid driver attempts to initialize them. The patch just adds the signature for these keyboards to the hid-quirks list with the setting HID_QUIRK_NO_INIT_REPORTS. This removes the 5-10 second wait for the timeout to occur. Signed-off-by: Jeremiah Matthey Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 61c8809..7d27d2b 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -579,6 +579,9 @@ #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE 0x0001 #define USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE 0x0600 +#define USB_VENDOR_ID_SIGMA_MICRO 0x1c4f +#define USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD 0x0002 + #define USB_VENDOR_ID_SKYCABLE 0x1223 #define USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER 0x3F07 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 621959d..4bdb5d4 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -89,6 +89,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD, HID_QUIRK_NO_INIT_REPORTS }, { 0, 0 } }; -- cgit v1.1 From 7c4c3960dff109bc5db4c35da481c212dadb5eb5 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 22 Aug 2011 21:17:57 +0000 Subject: drm/ttm: fix ttm_bo_add_ttm(user) failure path ttm_tt_destroy kfrees passed object, so we need to nullify a reference to it. Signed-off-by: Marcin Slusarz Cc: stable@kernel.org Reviewed-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 56619f6..384116a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -353,8 +353,10 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc) ret = ttm_tt_set_user(bo->ttm, current, bo->buffer_start, bo->num_pages); - if (unlikely(ret != 0)) + if (unlikely(ret != 0)) { ttm_tt_destroy(bo->ttm); + bo->ttm = NULL; + } break; default: printk(KERN_ERR TTM_PFX "Illegal buffer object type\n"); -- cgit v1.1 From eac2095398668f989a3dd8d00be1b87850d78c01 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 22 Aug 2011 03:15:04 +0000 Subject: drm/ttm: unbind ttm before destroying node in accel move cleanup Nouveau makes the assumption that if a TTM is bound there will be a mm_node around for it and the backwards ordering here resulted in a use-after-free on some eviction paths. Signed-off-by: Ben Skeggs Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 77dbf40..ae3c6f5 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -635,13 +635,13 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, if (ret) return ret; - ttm_bo_free_old_node(bo); if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && (bo->ttm != NULL)) { ttm_tt_unbind(bo->ttm); ttm_tt_destroy(bo->ttm); bo->ttm = NULL; } + ttm_bo_free_old_node(bo); } else { /** * This should help pipeline ordinary buffer moves. -- cgit v1.1 From 8d3bb23609d4ae22803a15d232289fc09a7b61c4 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 22 Aug 2011 03:15:05 +0000 Subject: drm/ttm: ensure ttm for new node is bound before calling move_notify() This was true for new TTM_PL_SYSTEM and new TTM_PL_TT cases, but wasn't the case on TTM_PL_SYSTEM<->TTM_PL_TT moves, which causes trouble on some paths as nouveau's move_notify() hook requires that the dma addresses be valid at this point. Signed-off-by: Ben Skeggs Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 384116a..a4d38d8 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -392,10 +392,12 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, * Create and bind a ttm if required. */ - if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && (bo->ttm == NULL)) { - ret = ttm_bo_add_ttm(bo, false); - if (ret) - goto out_err; + if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) { + if (bo->ttm == NULL) { + ret = ttm_bo_add_ttm(bo, false); + if (ret) + goto out_err; + } ret = ttm_tt_set_placement_caching(bo->ttm, mem->placement); if (ret) -- cgit v1.1 From 3989ef6cfb80825af2f7933415797f052817ac3e Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 17 Aug 2011 11:43:20 +0200 Subject: HID: wiimote: Simplify synchronization The new locking scheme in HID core allows us to remove a bit of synchronization. Since the HID layer acts synchronously we simply register input core last and there are no synchonization issues anymore. Also register sysfs files after that to simplify the code. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/hid-wiimote.c | 78 ++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c index a594383..8a68bf5 100644 --- a/drivers/hid/hid-wiimote.c +++ b/drivers/hid/hid-wiimote.c @@ -10,7 +10,6 @@ * any later version. */ -#include #include #include #include @@ -33,7 +32,6 @@ struct wiimote_state { }; struct wiimote_data { - atomic_t ready; struct hid_device *hdev; struct input_dev *input; @@ -200,9 +198,6 @@ static ssize_t wiifs_led_show_##num(struct device *dev, \ unsigned long flags; \ int state; \ \ - if (!atomic_read(&wdata->ready)) \ - return -EBUSY; \ - \ spin_lock_irqsave(&wdata->state.lock, flags); \ state = !!(wdata->state.flags & WIIPROTO_FLAG_LED##num); \ spin_unlock_irqrestore(&wdata->state.lock, flags); \ @@ -217,9 +212,6 @@ static ssize_t wiifs_led_set_##num(struct device *dev, \ unsigned long flags; \ __u8 state; \ \ - if (!atomic_read(&wdata->ready)) \ - return -EBUSY; \ - \ spin_lock_irqsave(&wdata->state.lock, flags); \ \ state = wdata->state.flags; \ @@ -244,13 +236,6 @@ wiifs_led_show_set(4); static int wiimote_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { - struct wiimote_data *wdata = input_get_drvdata(dev); - - if (!atomic_read(&wdata->ready)) - return -EBUSY; - /* smp_rmb: Make sure wdata->xy is available when wdata->ready is 1 */ - smp_rmb(); - return 0; } @@ -300,11 +285,6 @@ static int wiimote_hid_event(struct hid_device *hdev, struct hid_report *report, int i; unsigned long flags; - if (!atomic_read(&wdata->ready)) - return -EBUSY; - /* smp_rmb: Make sure wdata->xy is available when wdata->ready is 1 */ - smp_rmb(); - if (size < 1) return -EINVAL; @@ -362,6 +342,15 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev) static void wiimote_destroy(struct wiimote_data *wdata) { + device_remove_file(&wdata->hdev->dev, &dev_attr_led1); + device_remove_file(&wdata->hdev->dev, &dev_attr_led2); + device_remove_file(&wdata->hdev->dev, &dev_attr_led3); + device_remove_file(&wdata->hdev->dev, &dev_attr_led4); + + input_unregister_device(wdata->input); + cancel_work_sync(&wdata->worker); + hid_hw_stop(wdata->hdev); + kfree(wdata); } @@ -377,19 +366,6 @@ static int wiimote_hid_probe(struct hid_device *hdev, return -ENOMEM; } - ret = device_create_file(&hdev->dev, &dev_attr_led1); - if (ret) - goto err; - ret = device_create_file(&hdev->dev, &dev_attr_led2); - if (ret) - goto err; - ret = device_create_file(&hdev->dev, &dev_attr_led3); - if (ret) - goto err; - ret = device_create_file(&hdev->dev, &dev_attr_led4); - if (ret) - goto err; - ret = hid_parse(hdev); if (ret) { hid_err(hdev, "HID parse failed\n"); @@ -408,9 +384,19 @@ static int wiimote_hid_probe(struct hid_device *hdev, goto err_stop; } - /* smp_wmb: Write wdata->xy first before wdata->ready is set to 1 */ - smp_wmb(); - atomic_set(&wdata->ready, 1); + ret = device_create_file(&hdev->dev, &dev_attr_led1); + if (ret) + goto err_free; + ret = device_create_file(&hdev->dev, &dev_attr_led2); + if (ret) + goto err_free; + ret = device_create_file(&hdev->dev, &dev_attr_led3); + if (ret) + goto err_free; + ret = device_create_file(&hdev->dev, &dev_attr_led4); + if (ret) + goto err_free; + hid_info(hdev, "New device registered\n"); /* by default set led1 after device initialization */ @@ -420,15 +406,15 @@ static int wiimote_hid_probe(struct hid_device *hdev, return 0; +err_free: + wiimote_destroy(wdata); + return ret; + err_stop: hid_hw_stop(hdev); err: input_free_device(wdata->input); - device_remove_file(&hdev->dev, &dev_attr_led1); - device_remove_file(&hdev->dev, &dev_attr_led2); - device_remove_file(&hdev->dev, &dev_attr_led3); - device_remove_file(&hdev->dev, &dev_attr_led4); - wiimote_destroy(wdata); + kfree(wdata); return ret; } @@ -437,16 +423,6 @@ static void wiimote_hid_remove(struct hid_device *hdev) struct wiimote_data *wdata = hid_get_drvdata(hdev); hid_info(hdev, "Device removed\n"); - - device_remove_file(&hdev->dev, &dev_attr_led1); - device_remove_file(&hdev->dev, &dev_attr_led2); - device_remove_file(&hdev->dev, &dev_attr_led3); - device_remove_file(&hdev->dev, &dev_attr_led4); - - hid_hw_stop(hdev); - input_unregister_device(wdata->input); - - cancel_work_sync(&wdata->worker); wiimote_destroy(wdata); } -- cgit v1.1 From 26af17484a737aaa991a7ce578cb15809a582fbc Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 17 Aug 2011 11:43:21 +0200 Subject: HID: wiimote: Correctly call HID open/close callbacks Even though the bluetooth hid backend does not react on open/close callbacks, we should call them to be consistent with other hid drivers. Also the new input open/close handlers will be used in future to prepare the wiimote device for IR/extension input. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/hid-wiimote.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c index 8a68bf5..d49f67c 100644 --- a/drivers/hid/hid-wiimote.c +++ b/drivers/hid/hid-wiimote.c @@ -239,6 +239,20 @@ static int wiimote_input_event(struct input_dev *dev, unsigned int type, return 0; } +static int wiimote_input_open(struct input_dev *dev) +{ + struct wiimote_data *wdata = input_get_drvdata(dev); + + return hid_hw_open(wdata->hdev); +} + +static void wiimote_input_close(struct input_dev *dev) +{ + struct wiimote_data *wdata = input_get_drvdata(dev); + + hid_hw_close(wdata->hdev); +} + static void handler_keys(struct wiimote_data *wdata, const __u8 *payload) { input_report_key(wdata->input, wiiproto_keymap[WIIPROTO_KEY_LEFT], @@ -321,6 +335,8 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev) input_set_drvdata(wdata->input, wdata); wdata->input->event = wiimote_input_event; + wdata->input->open = wiimote_input_open; + wdata->input->close = wiimote_input_close; wdata->input->dev.parent = &wdata->hdev->dev; wdata->input->id.bustype = wdata->hdev->bus; wdata->input->id.vendor = wdata->hdev->vendor; -- cgit v1.1 From 23a5a4a39eddbe515a832767a371cc54e82cc25e Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 17 Aug 2011 11:43:22 +0200 Subject: HID: wiimote: Register led class devices This registers 4 led devices to allow controlling the wiimote leds via standard LED sysfs API. It removes the four sysfs attributes so we don't have two APIs for one device. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 1 + drivers/hid/hid-wiimote.c | 165 +++++++++++++++++++++++++++++----------------- 2 files changed, 107 insertions(+), 59 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 306b15f..1130a89 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -589,6 +589,7 @@ config HID_WACOM_POWER_SUPPLY config HID_WIIMOTE tristate "Nintendo Wii Remote support" depends on BT_HIDP + depends on LEDS_CLASS ---help--- Support for the Nintendo Wii Remote bluetooth device. diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c index d49f67c..29edd55 100644 --- a/drivers/hid/hid-wiimote.c +++ b/drivers/hid/hid-wiimote.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "hid-ids.h" @@ -34,6 +35,7 @@ struct wiimote_state { struct wiimote_data { struct hid_device *hdev; struct input_dev *input; + struct led_classdev *leds[4]; spinlock_t qlock; __u8 head; @@ -51,6 +53,9 @@ struct wiimote_data { #define WIIPROTO_FLAGS_LEDS (WIIPROTO_FLAG_LED1 | WIIPROTO_FLAG_LED2 | \ WIIPROTO_FLAG_LED3 | WIIPROTO_FLAG_LED4) +/* return flag for led \num */ +#define WIIPROTO_FLAG_LED(num) (WIIPROTO_FLAG_LED1 << (num - 1)) + enum wiiproto_reqs { WIIPROTO_REQ_LED = 0x11, WIIPROTO_REQ_DRM_K = 0x30, @@ -85,9 +90,6 @@ static __u16 wiiproto_keymap[] = { BTN_MODE, /* WIIPROTO_KEY_HOME */ }; -#define dev_to_wii(pdev) hid_get_drvdata(container_of(pdev, struct hid_device, \ - dev)) - static ssize_t wiimote_hid_send(struct hid_device *hdev, __u8 *buffer, size_t count) { @@ -190,48 +192,53 @@ static void wiiproto_req_leds(struct wiimote_data *wdata, int leds) wiimote_queue(wdata, cmd, sizeof(cmd)); } -#define wiifs_led_show_set(num) \ -static ssize_t wiifs_led_show_##num(struct device *dev, \ - struct device_attribute *attr, char *buf) \ -{ \ - struct wiimote_data *wdata = dev_to_wii(dev); \ - unsigned long flags; \ - int state; \ - \ - spin_lock_irqsave(&wdata->state.lock, flags); \ - state = !!(wdata->state.flags & WIIPROTO_FLAG_LED##num); \ - spin_unlock_irqrestore(&wdata->state.lock, flags); \ - \ - return sprintf(buf, "%d\n", state); \ -} \ -static ssize_t wiifs_led_set_##num(struct device *dev, \ - struct device_attribute *attr, const char *buf, size_t count) \ -{ \ - struct wiimote_data *wdata = dev_to_wii(dev); \ - int tmp = simple_strtoul(buf, NULL, 10); \ - unsigned long flags; \ - __u8 state; \ - \ - spin_lock_irqsave(&wdata->state.lock, flags); \ - \ - state = wdata->state.flags; \ - \ - if (tmp) \ - wiiproto_req_leds(wdata, state | WIIPROTO_FLAG_LED##num);\ - else \ - wiiproto_req_leds(wdata, state & ~WIIPROTO_FLAG_LED##num);\ - \ - spin_unlock_irqrestore(&wdata->state.lock, flags); \ - \ - return count; \ -} \ -static DEVICE_ATTR(led##num, S_IRUGO | S_IWUSR, wiifs_led_show_##num, \ - wiifs_led_set_##num) - -wiifs_led_show_set(1); -wiifs_led_show_set(2); -wiifs_led_show_set(3); -wiifs_led_show_set(4); +static enum led_brightness wiimote_leds_get(struct led_classdev *led_dev) +{ + struct wiimote_data *wdata; + struct device *dev = led_dev->dev->parent; + int i; + unsigned long flags; + bool value = false; + + wdata = hid_get_drvdata(container_of(dev, struct hid_device, dev)); + + for (i = 0; i < 4; ++i) { + if (wdata->leds[i] == led_dev) { + spin_lock_irqsave(&wdata->state.lock, flags); + value = wdata->state.flags & WIIPROTO_FLAG_LED(i + 1); + spin_unlock_irqrestore(&wdata->state.lock, flags); + break; + } + } + + return value ? LED_FULL : LED_OFF; +} + +static void wiimote_leds_set(struct led_classdev *led_dev, + enum led_brightness value) +{ + struct wiimote_data *wdata; + struct device *dev = led_dev->dev->parent; + int i; + unsigned long flags; + __u8 state, flag; + + wdata = hid_get_drvdata(container_of(dev, struct hid_device, dev)); + + for (i = 0; i < 4; ++i) { + if (wdata->leds[i] == led_dev) { + flag = WIIPROTO_FLAG_LED(i + 1); + spin_lock_irqsave(&wdata->state.lock, flags); + state = wdata->state.flags; + if (value == LED_OFF) + wiiproto_req_leds(wdata, state & ~flag); + else + wiiproto_req_leds(wdata, state | flag); + spin_unlock_irqrestore(&wdata->state.lock, flags); + break; + } + } +} static int wiimote_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) @@ -315,6 +322,58 @@ static int wiimote_hid_event(struct hid_device *hdev, struct hid_report *report, return 0; } +static void wiimote_leds_destroy(struct wiimote_data *wdata) +{ + int i; + struct led_classdev *led; + + for (i = 0; i < 4; ++i) { + if (wdata->leds[i]) { + led = wdata->leds[i]; + wdata->leds[i] = NULL; + led_classdev_unregister(led); + kfree(led); + } + } +} + +static int wiimote_leds_create(struct wiimote_data *wdata) +{ + int i, ret; + struct device *dev = &wdata->hdev->dev; + size_t namesz = strlen(dev_name(dev)) + 9; + struct led_classdev *led; + char *name; + + for (i = 0; i < 4; ++i) { + led = kzalloc(sizeof(struct led_classdev) + namesz, GFP_KERNEL); + if (!led) { + ret = -ENOMEM; + goto err; + } + name = (void*)&led[1]; + snprintf(name, namesz, "%s:blue:p%d", dev_name(dev), i); + led->name = name; + led->brightness = 0; + led->max_brightness = 1; + led->brightness_get = wiimote_leds_get; + led->brightness_set = wiimote_leds_set; + + ret = led_classdev_register(dev, led); + if (ret) { + kfree(led); + goto err; + } + wdata->leds[i] = led; + } + + return 0; + +err: + wiimote_leds_destroy(wdata); + return ret; +} + static struct wiimote_data *wiimote_create(struct hid_device *hdev) { struct wiimote_data *wdata; @@ -358,10 +417,7 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev) static void wiimote_destroy(struct wiimote_data *wdata) { - device_remove_file(&wdata->hdev->dev, &dev_attr_led1); - device_remove_file(&wdata->hdev->dev, &dev_attr_led2); - device_remove_file(&wdata->hdev->dev, &dev_attr_led3); - device_remove_file(&wdata->hdev->dev, &dev_attr_led4); + wiimote_leds_destroy(wdata); input_unregister_device(wdata->input); cancel_work_sync(&wdata->worker); @@ -400,16 +456,7 @@ static int wiimote_hid_probe(struct hid_device *hdev, goto err_stop; } - ret = device_create_file(&hdev->dev, &dev_attr_led1); - if (ret) - goto err_free; - ret = device_create_file(&hdev->dev, &dev_attr_led2); - if (ret) - goto err_free; - ret = device_create_file(&hdev->dev, &dev_attr_led3); - if (ret) - goto err_free; - ret = device_create_file(&hdev->dev, &dev_attr_led4); + ret = wiimote_leds_create(wdata); if (ret) goto err_free; -- cgit v1.1 From 2cb5e4bc530471e9596cd32390bf70c8ada13d9a Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 17 Aug 2011 11:43:23 +0200 Subject: HID: wiimote: Add drm request The wiimote reports data in several data reporting modes (DRM). The DRM request makes the wiimote send data in the requested drm. The DRM mode can be set explicitely or can be chosen by the driver. To let the driver choose the DRM mode, pass WIIPROTO_REQ_NULL placeholder to it. This is no valid request and is replaced with an appropriate DRM. Currently, the driver always sets the basic DRM_K mode, but this will be extended when further peripherals like accelerometer and IR are supported. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/hid-wiimote.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c index 29edd55..84c9eb9 100644 --- a/drivers/hid/hid-wiimote.c +++ b/drivers/hid/hid-wiimote.c @@ -57,7 +57,9 @@ struct wiimote_data { #define WIIPROTO_FLAG_LED(num) (WIIPROTO_FLAG_LED1 << (num - 1)) enum wiiproto_reqs { + WIIPROTO_REQ_NULL = 0x0, WIIPROTO_REQ_LED = 0x11, + WIIPROTO_REQ_DRM = 0x12, WIIPROTO_REQ_DRM_K = 0x30, }; @@ -192,6 +194,30 @@ static void wiiproto_req_leds(struct wiimote_data *wdata, int leds) wiimote_queue(wdata, cmd, sizeof(cmd)); } +/* + * Check what peripherals of the wiimote are currently + * active and select a proper DRM that supports all of + * the requested data inputs. + */ +static __u8 select_drm(struct wiimote_data *wdata) +{ + return WIIPROTO_REQ_DRM_K; +} + +static void wiiproto_req_drm(struct wiimote_data *wdata, __u8 drm) +{ + __u8 cmd[3]; + + if (drm == WIIPROTO_REQ_NULL) + drm = select_drm(wdata); + + cmd[0] = WIIPROTO_REQ_DRM; + cmd[1] = 0; + cmd[2] = drm; + + wiimote_queue(wdata, cmd, sizeof(cmd)); +} + static enum led_brightness wiimote_leds_get(struct led_classdev *led_dev) { struct wiimote_data *wdata; -- cgit v1.1 From c87019e41d61f3f972bd2f6a2380fc9896e4ab74 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 17 Aug 2011 11:43:24 +0200 Subject: HID: wiimote: Add status and return request handlers The wiimote resets the current drm when an extension is plugged in. Fortunately, it also sends a status report in this situation so we just reset the drm on every status report to keep the drm consistent. Also handle return reports from the wiimote which indicate success and failure of requests that we've sent. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/hid-wiimote.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c index 84c9eb9..85a02e5 100644 --- a/drivers/hid/hid-wiimote.c +++ b/drivers/hid/hid-wiimote.c @@ -60,6 +60,8 @@ enum wiiproto_reqs { WIIPROTO_REQ_NULL = 0x0, WIIPROTO_REQ_LED = 0x11, WIIPROTO_REQ_DRM = 0x12, + WIIPROTO_REQ_STATUS = 0x20, + WIIPROTO_REQ_RETURN = 0x22, WIIPROTO_REQ_DRM_K = 0x30, }; @@ -313,6 +315,26 @@ static void handler_keys(struct wiimote_data *wdata, const __u8 *payload) input_sync(wdata->input); } +static void handler_status(struct wiimote_data *wdata, const __u8 *payload) +{ + handler_keys(wdata, payload); + + /* on status reports the drm is reset so we need to resend the drm */ + wiiproto_req_drm(wdata, WIIPROTO_REQ_NULL); +} + +static void handler_return(struct wiimote_data *wdata, const __u8 *payload) +{ + __u8 err = payload[3]; + __u8 cmd = payload[2]; + + handler_keys(wdata, payload); + + if (err) + hid_warn(wdata->hdev, "Remote error %hhu on req %hhu\n", err, + cmd); +} + struct wiiproto_handler { __u8 id; size_t size; @@ -320,6 +342,8 @@ struct wiiproto_handler { }; static struct wiiproto_handler handlers[] = { + { .id = WIIPROTO_REQ_STATUS, .size = 6, .func = handler_status }, + { .id = WIIPROTO_REQ_RETURN, .size = 4, .func = handler_return }, { .id = WIIPROTO_REQ_DRM_K, .size = 2, .func = handler_keys }, { .id = 0 } }; -- cgit v1.1 From 675c1aa3c4a7290e537e854d0af7cdf9692bd396 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2011 12:36:28 +0200 Subject: ALSA: hda - Fix output-path initialization for Realtek auto-parser When the headphone or speaker output has no own DAC, initialize the path using the primary DAC. Otherwise the path won't be set properly and can result in the silence. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fcb11af..0fefc10 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3083,16 +3083,22 @@ static void alc_auto_init_multi_out(struct hda_codec *codec) static void alc_auto_init_extra_out(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t pin; + hda_nid_t pin, dac; pin = spec->autocfg.hp_pins[0]; - if (pin) - alc_auto_set_output_and_unmute(codec, pin, PIN_HP, - spec->multiout.hp_nid); + if (pin) { + dac = spec->multiout.hp_nid; + if (!dac) + dac = spec->multiout.dac_nids[0]; + alc_auto_set_output_and_unmute(codec, pin, PIN_HP, dac); + } pin = spec->autocfg.speaker_pins[0]; - if (pin) - alc_auto_set_output_and_unmute(codec, pin, PIN_OUT, - spec->multiout.extra_out_nid[0]); + if (pin) { + dac = spec->multiout.extra_out_nid[0]; + if (!dac) + dac = spec->multiout.dac_nids[0]; + alc_auto_set_output_and_unmute(codec, pin, PIN_OUT, dac); + } } /* -- cgit v1.1 From 3c715a98844f72cec0fa3ef2b68232b8f751468b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2011 12:41:09 +0200 Subject: ALSA: hda - Update jack-sense info even when no automute is set The internal states, jack_present and line_jack_present should be updated upon unsolicited events even if no automute is set. Otherwise the wrong state is referred when the automute behavior is changed by the mixer control. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0fefc10..7cabd73 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -565,11 +565,11 @@ static void alc_hp_automute(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - if (!spec->automute) - return; spec->jack_present = detect_jacks(codec, ARRAY_SIZE(spec->autocfg.hp_pins), spec->autocfg.hp_pins); + if (!spec->automute) + return; update_speakers(codec); } @@ -578,11 +578,11 @@ static void alc_line_automute(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - if (!spec->automute || !spec->detect_line) - return; spec->line_jack_present = detect_jacks(codec, ARRAY_SIZE(spec->autocfg.line_out_pins), spec->autocfg.line_out_pins); + if (!spec->automute || !spec->detect_line) + return; update_speakers(codec); } -- cgit v1.1 From f2b60717e692550bf753a5d64a5b69ea430fc832 Mon Sep 17 00:00:00 2001 From: Thomas Reim Date: Wed, 17 Aug 2011 09:03:32 +0000 Subject: drm/radeon: Extended DDC Probing for Toshiba L300D Radeon Mobility X1100 HDMI-A Connector Toshiba Satellite L300D with ATI Mobility Radeon X1100 sends data to i2c bus for a HDMI connector that is not implemented/existent on the notebook's board. Fix by applying extented DDC probing for this connector. Requires [PATCH] drm/radeon: Extended DDC Probing for Connectors with Improperly Wired DDC Lines Tested for kernel 2.6.38 on Toshiba Satellite L300D notebook BugLink: http://bugs.launchpad.net/bugs/826677 Signed-off-by: Thomas Reim Acked-by: Chris Routh Cc: Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_connectors.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 7f65940..4f0c1ec 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -466,6 +466,16 @@ static bool radeon_connector_needs_extended_probe(struct radeon_device *dev, (supported_device == ATOM_DEVICE_DFP2_SUPPORT)) return true; } + /* TOSHIBA Satellite L300D with ATI Mobility Radeon x1100 + * (RS690M) sends data to i2c bus for a HDMI connector that + * is not implemented */ + if ((dev->pdev->device == 0x791f) && + (dev->pdev->subsystem_vendor == 0x1179) && + (dev->pdev->subsystem_device == 0xff68)) { + if ((connector_type == DRM_MODE_CONNECTOR_HDMIA) && + (supported_device == ATOM_DEVICE_DFP2_SUPPORT)) + return true; + } /* Default: no EDID header probe required for DDC probing */ return false; -- cgit v1.1 From 5dc06c5a70b79a323152bec7e55783e705767e63 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 14:49:55 +0200 Subject: block: remove READ_META and WRITE_META Replace all occurnanced of the undocumented READ_META with READ | REQ_META and remove the unused WRITE_META define. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/ext3/inode.c | 4 ++-- fs/ext3/namei.c | 2 +- fs/ext4/inode.c | 4 ++-- fs/ext4/namei.c | 2 +- fs/gfs2/quota.c | 2 +- include/linux/fs.h | 2 -- 6 files changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 04da6ac..bba7b96 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1134,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, return bh; if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -2807,7 +2807,7 @@ make_io: trace_ext3_load_inode(inode); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ_META, bh); + submit_bh(READ | REQ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ext3_error(inode->i_sb, "ext3_get_inode_loc", diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 5571708..c7d4032 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -922,7 +922,7 @@ restart: bh = ext3_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META, 1, &bh); } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c4da98a..1dfa18f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -650,7 +650,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, return bh; if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -3301,7 +3301,7 @@ make_io: trace_ext4_load_inode(inode); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ_META, bh); + submit_bh(READ | REQ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { EXT4_ERROR_INODE_BLOCK(inode, block, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f8068c7..d36315a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -922,7 +922,7 @@ restart: bh = ext4_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META, 1, &bh); } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 42e8d23..0534340 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -709,7 +709,7 @@ get_a_page: set_buffer_uptodate(bh); if (!buffer_uptodate(bh)) { - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto unlock_out; diff --git a/include/linux/fs.h b/include/linux/fs.h index 178cdb4..eae44c9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -162,10 +162,8 @@ struct inodes_stat_t { #define READA RWA_MASK #define READ_SYNC (READ | REQ_SYNC) -#define READ_META (READ | REQ_META) #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) #define WRITE_ODIRECT (WRITE | REQ_SYNC) -#define WRITE_META (WRITE | REQ_META) #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) -- cgit v1.1 From 65299a3b788bd274bed92f9fa3232082c9f3ea70 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 14:50:29 +0200 Subject: block: separate priority boosting from REQ_META Add a new REQ_PRIO to let requests preempt others in the cfq I/O schedule, and lave REQ_META purely for marking requests as metadata in blktrace. All existing callers of REQ_META except for XFS are updated to also set REQ_PRIO for now. Signed-off-by: Christoph Hellwig Reviewed-by: Namhyung Kim Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 20 ++++++++++---------- drivers/mmc/card/block.c | 3 +++ fs/ext3/inode.c | 4 ++-- fs/ext3/namei.c | 3 ++- fs/ext4/inode.c | 4 ++-- fs/ext4/namei.c | 3 ++- fs/gfs2/log.c | 4 ++-- fs/gfs2/meta_io.c | 6 +++--- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/quota.c | 2 +- include/linux/blk_types.h | 6 ++++-- 11 files changed, 32 insertions(+), 25 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index a33bd43..16ace89 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -130,8 +130,8 @@ struct cfq_queue { unsigned long slice_end; long slice_resid; - /* pending metadata requests */ - int meta_pending; + /* pending priority requests */ + int prio_pending; /* number of requests that are on the dispatch list or inside driver */ int dispatched; @@ -684,8 +684,8 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, if (rq_is_sync(rq1) != rq_is_sync(rq2)) return rq_is_sync(rq1) ? rq1 : rq2; - if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META) - return rq1->cmd_flags & REQ_META ? rq1 : rq2; + if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO) + return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2; s1 = blk_rq_pos(rq1); s2 = blk_rq_pos(rq2); @@ -1612,9 +1612,9 @@ static void cfq_remove_request(struct request *rq) cfqq->cfqd->rq_queued--; cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); - if (rq->cmd_flags & REQ_META) { - WARN_ON(!cfqq->meta_pending); - cfqq->meta_pending--; + if (rq->cmd_flags & REQ_PRIO) { + WARN_ON(!cfqq->prio_pending); + cfqq->prio_pending--; } } @@ -3372,7 +3372,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, * So both queues are sync. Let the new request get disk time if * it's a metadata request and the current queue is doing regular IO. */ - if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending) + if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending) return true; /* @@ -3439,8 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_io_context *cic = RQ_CIC(rq); cfqd->rq_queued++; - if (rq->cmd_flags & REQ_META) - cfqq->meta_pending++; + if (rq->cmd_flags & REQ_PRIO) + cfqq->prio_pending++; cfq_update_io_thinktime(cfqd, cfqq, cic); cfq_update_io_seektime(cfqd, cfqq, rq); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 1ff5486..4c1a648 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -926,6 +926,9 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, /* * Reliable writes are used to implement Forced Unit Access and * REQ_META accesses, and are supported only on MMCs. + * + * XXX: this really needs a good explanation of why REQ_META + * is treated special. */ bool do_rel_wr = ((req->cmd_flags & REQ_FUA) || (req->cmd_flags & REQ_META)) && diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index bba7b96..12661e1 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1134,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, return bh; if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ | REQ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -2807,7 +2807,7 @@ make_io: trace_ext3_load_inode(inode); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ | REQ_META, bh); + submit_bh(READ | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ext3_error(inode->i_sb, "ext3_get_inode_loc", diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index c7d4032..0629e09 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -922,7 +922,8 @@ restart: bh = ext3_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ | REQ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, + 1, &bh); } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1dfa18f..c7cbb3d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -650,7 +650,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, return bh; if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ | REQ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -3301,7 +3301,7 @@ make_io: trace_ext4_load_inode(inode); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ | REQ_META, bh); + submit_bh(READ | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { EXT4_ERROR_INODE_BLOCK(inode, block, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index d36315a..1c924fa 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -922,7 +922,8 @@ restart: bh = ext4_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ | REQ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, + 1, &bh); } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 85c6292..5986464 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -624,9 +624,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) bh->b_end_io = end_buffer_write_sync; get_bh(bh); if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) - submit_bh(WRITE_SYNC | REQ_META, bh); + submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); else - submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); + submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 747238c..be29858 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb { struct buffer_head *bh, *head; int nr_underway = 0; - int write_op = REQ_META | + int write_op = REQ_META | REQ_PRIO | (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); BUG_ON(!PageLocked(page)); @@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, } bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(READ_SYNC | REQ_META, bh); + submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh); if (!(flags & DIO_WAIT)) return 0; @@ -435,7 +435,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh); + ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh); dblock++; extlen--; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3bc073a..079587e 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -224,7 +224,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) bio->bi_end_io = end_bio_io_page; bio->bi_private = page; - submit_bio(READ_SYNC | REQ_META, bio); + submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio); wait_on_page_locked(page); bio_put(bio); if (!PageUptodate(page)) { diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 0534340..0e8bb13 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -709,7 +709,7 @@ get_a_page: set_buffer_uptodate(bh); if (!buffer_uptodate(bh)) { - ll_rw_block(READ | REQ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto unlock_out; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 32f0076..71fc53b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -124,6 +124,7 @@ enum rq_flag_bits { __REQ_SYNC, /* request is sync (sync write or read) */ __REQ_META, /* metadata io request */ + __REQ_PRIO, /* boost priority in cfq */ __REQ_DISCARD, /* request to discard sectors */ __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ @@ -161,14 +162,15 @@ enum rq_flag_bits { #define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) #define REQ_SYNC (1 << __REQ_SYNC) #define REQ_META (1 << __REQ_META) +#define REQ_PRIO (1 << __REQ_PRIO) #define REQ_DISCARD (1 << __REQ_DISCARD) #define REQ_NOIDLE (1 << __REQ_NOIDLE) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ - (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_DISCARD | \ - REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) + (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ + REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) #define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_RAHEAD (1 << __REQ_RAHEAD) -- cgit v1.1 From 1f015f5fdc4003f3f2a7c66efdb1acf7a2d230bf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2011 14:57:08 +0200 Subject: ALSA: hda - Fix double-headphone/speaker paths for Cxt auto-parser When multiple headphones or speakers are assigned but no individual DACs are available, the driver should take the first HP/SPK DAC instead of another primary output. The patch adds a bit-flag to dac field of struct pin_dac_pair indicating that it's a slave DAC. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 502fc94..4c462c3 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -3348,6 +3348,8 @@ static hda_nid_t get_unassigned_dac(struct hda_codec *codec, hda_nid_t pin, #define MAX_AUTO_DACS 5 +#define DAC_SLAVE_FLAG 0x8000 /* filled dac is a slave */ + /* fill analog DAC list from the widget tree */ static int fill_cx_auto_dacs(struct hda_codec *codec, hda_nid_t *dacs) { @@ -3379,6 +3381,8 @@ static int fill_dacs_for_pins(struct hda_codec *codec, hda_nid_t *pins, filled[nums].pin = pins[i]; filled[nums].type = type; filled[nums].dac = get_unassigned_dac(codec, pins[i], dacs, rest); + if (!filled[nums].dac && i > 0 && filled[0].dac) + filled[nums].dac = filled[0].dac | DAC_SLAVE_FLAG; nums++; } return nums; @@ -3407,7 +3411,7 @@ static void cx_auto_parse_output(struct hda_codec *codec) /* fill multiout struct */ for (i = 0; i < nums; i++) { hda_nid_t dac = spec->dac_info[i].dac; - if (!dac) + if (!dac || (dac & DAC_SLAVE_FLAG)) continue; switch (spec->dac_info[i].type) { case AUTO_PIN_LINE_OUT: @@ -4035,6 +4039,8 @@ static void cx_auto_init_output(struct hda_codec *codec) nid = spec->dac_info[i].dac; if (!nid) nid = spec->multiout.dac_nids[0]; + else if (nid & DAC_SLAVE_FLAG) + nid &= ~DAC_SLAVE_FLAG; select_connection(codec, spec->dac_info[i].pin, nid); } if (spec->auto_mute) { @@ -4191,7 +4197,8 @@ static int cx_auto_build_output_controls(struct hda_codec *codec) for (i = 0; i < spec->dac_info_filled; i++) { const char *label; int idx, type; - if (!spec->dac_info[i].dac) + hda_nid_t dac = spec->dac_info[i].dac; + if (!dac || (dac & DAC_SLAVE_FLAG)) continue; type = spec->dac_info[i].type; if (type == AUTO_PIN_LINE_OUT) @@ -4211,7 +4218,7 @@ static int cx_auto_build_output_controls(struct hda_codec *codec) idx = num_spk++; break; } - err = try_add_pb_volume(codec, spec->dac_info[i].dac, + err = try_add_pb_volume(codec, dac, spec->dac_info[i].pin, label, idx); if (err < 0) -- cgit v1.1 From 8c4074cd2254606aeb788d518ccc27c9f97129e1 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 1 Aug 2011 21:20:10 +0800 Subject: tty: Add "spi:" prefix for spi modalias Since commit e0626e38 (spi: prefix modalias with "spi:"), the spi modalias is prefixed with "spi:". This patch adds "spi:" prefix and removes "-spi" suffix in the modalias. Signed-off-by: Axel Lin Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max3107-aava.c | 2 +- drivers/tty/serial/max3107.c | 2 +- drivers/tty/serial/mrst_max3110.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/max3107-aava.c b/drivers/tty/serial/max3107-aava.c index a1fe304..d73aadd 100644 --- a/drivers/tty/serial/max3107-aava.c +++ b/drivers/tty/serial/max3107-aava.c @@ -340,5 +340,5 @@ module_exit(max3107_exit); MODULE_DESCRIPTION("MAX3107 driver"); MODULE_AUTHOR("Aavamobile"); -MODULE_ALIAS("aava-max3107-spi"); +MODULE_ALIAS("spi:aava-max3107"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/tty/serial/max3107.c b/drivers/tty/serial/max3107.c index 750b4f6..a816460 100644 --- a/drivers/tty/serial/max3107.c +++ b/drivers/tty/serial/max3107.c @@ -1209,5 +1209,5 @@ module_exit(max3107_exit); MODULE_DESCRIPTION("MAX3107 driver"); MODULE_AUTHOR("Aavamobile"); -MODULE_ALIAS("max3107-spi"); +MODULE_ALIAS("spi:max3107"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/tty/serial/mrst_max3110.c b/drivers/tty/serial/mrst_max3110.c index a764bf9..23bc743 100644 --- a/drivers/tty/serial/mrst_max3110.c +++ b/drivers/tty/serial/mrst_max3110.c @@ -917,4 +917,4 @@ module_init(serial_m3110_init); module_exit(serial_m3110_exit); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("max3110-uart"); +MODULE_ALIAS("spi:max3110-uart"); -- cgit v1.1 From 44178176ecc55ad370b837dd2c4b4b8bed1e3823 Mon Sep 17 00:00:00 2001 From: Eric Smith Date: Mon, 11 Jul 2011 22:53:13 -0600 Subject: 8250_pci: add support for Rosewill RC-305 4x serial port card This patch adds support for the Rosewill RC-305 four-port PCI serial card, and probably any other four-port serial cards based on the Moschip MCS9865 chip, assuming that the EEPROM on the card was programmed in accordance with Table 6 of the MCS9865 EEPROM Application Note version 0.3 dated 16-May-2008, available from the Moschip web site (registration required). This patch is based on an earlier patch [1] for the SYBA 6x serial port card by Ira W. Snyder. [1]: http://www.gossamer-threads.com/lists/linux/kernel/1162435 Signed-off-by: Eric Smith Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250_pci.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250_pci.c b/drivers/tty/serial/8250_pci.c index 6b887d9..f652a7b 100644 --- a/drivers/tty/serial/8250_pci.c +++ b/drivers/tty/serial/8250_pci.c @@ -4021,7 +4021,7 @@ static struct pci_device_id serial_pci_tbl[] = { 0, 0, pbn_NETMOS9900_2s_115200 }, /* - * Best Connectivity PCI Multi I/O cards + * Best Connectivity and Rosewill PCI Multi I/O cards */ { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865, @@ -4029,6 +4029,10 @@ static struct pci_device_id serial_pci_tbl[] = { 0, 0, pbn_b0_1_115200 }, { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865, + 0xA000, 0x3002, + 0, 0, pbn_b0_bt_2_115200 }, + + { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865, 0xA000, 0x3004, 0, 0, pbn_b0_bt_4_115200 }, /* Intel CE4100 */ -- cgit v1.1 From dacacc3e794c4c5bab05d97afc19e372e1877943 Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Tue, 12 Jul 2011 16:08:49 +0900 Subject: serial/8250_pci: delete duplicate data definition Data definiton "VendorID=10DB, device_id=800D" is already defined. This patch deletes the duplicate definition. Signed-off-by: Tomoya MORINAGA Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250_pci.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/tty/serial/8250_pci.c b/drivers/tty/serial/8250_pci.c index f652a7b..3abeca2 100644 --- a/drivers/tty/serial/8250_pci.c +++ b/drivers/tty/serial/8250_pci.c @@ -1599,11 +1599,6 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .device = 0x800D, .init = pci_eg20t_init, }, - { - .vendor = 0x10DB, - .device = 0x800D, - .init = pci_eg20t_init, - }, /* * Cronyx Omega PCI (PLX-chip based) */ -- cgit v1.1 From dbb3b1ca5609d1f3848cd387d06cc60aaacf7f98 Mon Sep 17 00:00:00 2001 From: Al Cooper Date: Mon, 25 Jul 2011 16:19:52 -0400 Subject: 8250: Fix race condition in serial8250_backup_timeout(). This is to fix an issue where output will suddenly become very slow. The problem occurs on 8250 UARTS with the hardware bug UART_BUG_THRE. BACKGROUND For normal UARTs (without UART_BUG_THRE): When the serial core layer gets new transmit data and the transmitter is idle, it buffers the data and calls the 8250s' serial8250_start_tx() routine which will simply enable the TX interrupt in the IER register and return. This should immediately fire a THRE interrupt and begin transmitting the data. For buggy UARTs (with UART_BUG_THRE): merely enabling the TX interrupt in IER does not necessarily generate a new THRE interrupt. Therefore, a background timer periodically checks to see if there is pending data, and starts transmission if that is the case. The bug happens on SMP systems when the system has nothing to transmit, the transmit interrupt is disabled and the following sequence occurs: - CPU0: The background timer routine serial8250_backup_timeout() starts and saves the state of the interrupt enable register (IER) and then disables all interrupts in IER. NOTE: The transmit interrupt (TI) bit is saved as disabled. - CPU1: The serial core gets data to transmit, grabs the port lock and calls serial8250_start_tx() which enables the TI in IER. - CPU0: serial8250_backup_timeout() waits for the port lock. - CPU1: finishes (with TI enabled) and releases the port lock. - CPU0: serial8250_backup_timeout() calls the interrupt routine which will transmit the next fifo's worth of data and then restores the IER from the previously saved value (TI disabled). At this point, as long as the serial core has more transmit data buffered, it will not call serial8250_start_tx() again and the background timer routine will slowly transmit the data. The fix is to have serial8250_start_tx() get the port lock before it saves the IER state and release it after restoring IER. This will prevent serial8250_start_tx() from running in parallel. Signed-off-by: Al Cooper Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250.c b/drivers/tty/serial/8250.c index f2dfec8..7f50999 100644 --- a/drivers/tty/serial/8250.c +++ b/drivers/tty/serial/8250.c @@ -1819,6 +1819,8 @@ static void serial8250_backup_timeout(unsigned long data) unsigned int iir, ier = 0, lsr; unsigned long flags; + spin_lock_irqsave(&up->port.lock, flags); + /* * Must disable interrupts or else we risk racing with the interrupt * based handler. @@ -1836,10 +1838,8 @@ static void serial8250_backup_timeout(unsigned long data) * the "Diva" UART used on the management processor on many HP * ia64 and parisc boxes. */ - spin_lock_irqsave(&up->port.lock, flags); lsr = serial_in(up, UART_LSR); up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; - spin_unlock_irqrestore(&up->port.lock, flags); if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) && (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) && (lsr & UART_LSR_THRE)) { @@ -1848,11 +1848,13 @@ static void serial8250_backup_timeout(unsigned long data) } if (!(iir & UART_IIR_NO_INT)) - serial8250_handle_port(up); + transmit_chars(up); if (is_real_interrupt(up->port.irq)) serial_out(up, UART_IER, ier); + spin_unlock_irqrestore(&up->port.lock, flags); + /* Standard timer interval plus 0.2s to keep the port running */ mod_timer(&up->timer, jiffies + uart_poll_timeout(&up->port) + HZ / 5); -- cgit v1.1 From 24d406a6bf736f7aebdc8fa0f0ec86e0890c6d24 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 10 Aug 2011 14:59:28 +0200 Subject: TTY: pty, fix pty counting tty_operations->remove is normally called like: queue_release_one_tty ->tty_shutdown ->tty_driver_remove_tty ->tty_operations->remove However tty_shutdown() is called from queue_release_one_tty() only if tty_operations->shutdown is NULL. But for pty, it is not. pty_unix98_shutdown() is used there as ->shutdown. So tty_operations->remove of pty (i.e. pty_unix98_remove()) is never called. This results in invalid pty_count. I.e. what can be seen in /proc/sys/kernel/pty/nr. I see this was already reported at: https://lkml.org/lkml/2009/11/5/370 But it was not fixed since then. This patch is kind of a hackish way. The problem lies in ->install. We allocate there another tty (so-called tty->link). So ->install is called once, but ->remove twice, for both tty and tty->link. The fix here is to count both tty and tty->link and divide the count by 2 for user. And to have ->remove called, let's make tty_driver_remove_tty() global and call that from pty_unix98_shutdown() (tty_operations->shutdown). While at it, let's document that when ->shutdown is defined, tty_shutdown() is not called. Signed-off-by: Jiri Slaby Cc: Alan Cox Cc: "H. Peter Anvin" Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/pty.c | 17 +++++++++++++++-- drivers/tty/tty_io.c | 3 +-- include/linux/tty.h | 2 ++ include/linux/tty_driver.h | 3 +++ 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 98b6e3b..e809e9d 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -446,8 +446,19 @@ static inline void legacy_pty_init(void) { } int pty_limit = NR_UNIX98_PTY_DEFAULT; static int pty_limit_min; static int pty_limit_max = NR_UNIX98_PTY_MAX; +static int tty_count; static int pty_count; +static inline void pty_inc_count(void) +{ + pty_count = (++tty_count) / 2; +} + +static inline void pty_dec_count(void) +{ + pty_count = (--tty_count) / 2; +} + static struct cdev ptmx_cdev; static struct ctl_table pty_table[] = { @@ -542,6 +553,7 @@ static struct tty_struct *pts_unix98_lookup(struct tty_driver *driver, static void pty_unix98_shutdown(struct tty_struct *tty) { + tty_driver_remove_tty(tty->driver, tty); /* We have our own method as we don't use the tty index */ kfree(tty->termios); } @@ -588,7 +600,8 @@ static int pty_unix98_install(struct tty_driver *driver, struct tty_struct *tty) */ tty_driver_kref_get(driver); tty->count++; - pty_count++; + pty_inc_count(); /* tty */ + pty_inc_count(); /* tty->link */ return 0; err_free_mem: deinitialize_tty_struct(o_tty); @@ -602,7 +615,7 @@ err_free_tty: static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) { - pty_count--; + pty_dec_count(); } static const struct tty_operations ptm_unix98_ops = { diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 150e4f7..4f1fc81 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1295,8 +1295,7 @@ static int tty_driver_install_tty(struct tty_driver *driver, * * Locking: tty_mutex for now */ -static void tty_driver_remove_tty(struct tty_driver *driver, - struct tty_struct *tty) +void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *tty) { if (driver->ops->remove) driver->ops->remove(driver, tty); diff --git a/include/linux/tty.h b/include/linux/tty.h index 44bc0c5..5f2ede8 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -421,6 +421,8 @@ extern void tty_driver_flush_buffer(struct tty_struct *tty); extern void tty_throttle(struct tty_struct *tty); extern void tty_unthrottle(struct tty_struct *tty); extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); +extern void tty_driver_remove_tty(struct tty_driver *driver, + struct tty_struct *tty); extern void tty_shutdown(struct tty_struct *tty); extern void tty_free_termios(struct tty_struct *tty); extern int is_current_pgrp_orphaned(void); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 9deeac8..ecdaeb9 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -47,6 +47,9 @@ * * This routine is called synchronously when a particular tty device * is closed for the last time freeing up the resources. + * Note that tty_shutdown() is not called if ops->shutdown is defined. + * This means one is responsible to take care of calling ops->remove (e.g. + * via tty_driver_remove_tty) and releasing tty->termios. * * * void (*cleanup)(struct tty_struct * tty); -- cgit v1.1 From 0055197e984e5fbe6f48f37fc50dd30254915493 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 17 Aug 2011 13:48:15 +0200 Subject: TTY: serial, document ignoring of uart->ops->startup error When a user has SYS_ADMIN capabilities and uart->ops->startup returns an error in uart_startup, we silently drop the error. We then return 0 and behave as if it didn't fail. (Not quite, since we set TTY_IO_ERROR bit and leave ASYNC_INITIALIZED bit cleared.) This all is to allow setserial to work with improperly configured or unconfigured ports. User can thus set port properties and reconfigure properly. This patch only documents this behavior. Signed-off-by: Jiri Slaby Cc: Alan Cox Cc: Russel King Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index db7912c..a3efbea 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -200,6 +200,11 @@ static int uart_startup(struct tty_struct *tty, struct uart_state *state, int in clear_bit(TTY_IO_ERROR, &tty->flags); } + /* + * This is to allow setserial on this port. People may want to set + * port/irq/type and then reconfigure the port properly if it failed + * now. + */ if (retval && capable(CAP_SYS_ADMIN)) retval = 0; -- cgit v1.1 From 69dd3d8e29e294caaf63eb5e8a72d250279f9e5f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 23 Aug 2011 10:36:51 -0700 Subject: Revert "irq: Always set IRQF_ONESHOT if no primary handler is specified" This reverts commit f3637a5f2e2eb391ff5757bc83fb5de8f9726464. It turns out that this breaks several drivers, one example being OMAP boards which use the on-board OMAP UARTs and the omap-serial driver that will not boot to userspace after the commit. Paul Walmsley reports that enabling CONFIG_DEBUG_SHIRQ reveals 'IRQ handler type mismatch' errors: IRQ handler type mismatch for IRQ 74 current handler: serial idle ... and the reason is that setting IRQF_ONESHOT will now result in those interrupt handlers having different IRQF flags, and thus being unsharable. So the commit log in the reverted commit: "Since it is required for those users and there is no difference for others it makes sense to add this flag unconditionally." is simply not true: there may not be any difference from a "actions at irq time", but there is a *big* difference wrt this flag testing irq management (see __setup_irq() in kernel/irq/manage.c). One solution may be to stop verifying IRQF_ONESHOT in __setup_irq(), but right now the safe course of action is to revert the change. Let's revisit this in a later merge window. Reported-by: Paul Walmsley Cc: Sebastian Andrzej Siewior Requested-by: Alan Cox Acked-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/irq/manage.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 2e94258..9b956fa 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1331,7 +1331,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, if (!thread_fn) return -EINVAL; handler = irq_default_primary_handler; - irqflags |= IRQF_ONESHOT; } action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); -- cgit v1.1 From b280a97d1caf6fe1d38b51ebb31219391f5ad1a0 Mon Sep 17 00:00:00 2001 From: Nick Pelly Date: Fri, 15 Jul 2011 13:53:08 -0700 Subject: omap-serial: Allow IXON and IXOFF to be disabled. Fixes logic bug that software flow control cannot be disabled, because serial_omap_configure_xonxoff() is not called if both IXON and IXOFF bits are cleared. Signed-off-by: Nick Pelly Acked-by: Govindraj.R Tested-by: Govindraj.R Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/omap-serial.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index c37df8d..5e713d3 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -806,8 +806,7 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, serial_omap_set_mctrl(&up->port, up->port.mctrl); /* Software Flow Control Configuration */ - if (termios->c_iflag & (IXON | IXOFF)) - serial_omap_configure_xonxoff(up, termios); + serial_omap_configure_xonxoff(up, termios); spin_unlock_irqrestore(&up->port.lock, flags); dev_dbg(up->port.dev, "serial_omap_set_termios+%d\n", up->pdev->id); -- cgit v1.1 From 4b723a471050a8b80f7fa86e76f01f4c711b3443 Mon Sep 17 00:00:00 2001 From: srinidhi kasagar Date: Tue, 9 Aug 2011 20:17:22 +0200 Subject: i2c-nomadik: Do not use _interruptible_ variant call If there is a signal pending and wait_for_completion_interruptible_timeout exited because of the -ERESTARTSYS error we are unable to send any more i2c messages. So, deprecate this _interruptible_ variant call. Signed-off-by: Srinidhi Kasagar Signed-off-by: Linus Walleij Signed-off-by: Ben Dooks --- drivers/i2c/busses/i2c-nomadik.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index 0c731ca..f9b8854 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -417,12 +417,12 @@ static int read_i2c(struct nmk_i2c_dev *dev) writel(readl(dev->virtbase + I2C_IMSCR) | irq_mask, dev->virtbase + I2C_IMSCR); - timeout = wait_for_completion_interruptible_timeout( + timeout = wait_for_completion_timeout( &dev->xfer_complete, dev->adap.timeout); if (timeout < 0) { dev_err(&dev->pdev->dev, - "wait_for_completion_interruptible_timeout" + "wait_for_completion_timeout" "returned %d waiting for event\n", timeout); status = timeout; } @@ -504,12 +504,12 @@ static int write_i2c(struct nmk_i2c_dev *dev) writel(readl(dev->virtbase + I2C_IMSCR) | irq_mask, dev->virtbase + I2C_IMSCR); - timeout = wait_for_completion_interruptible_timeout( + timeout = wait_for_completion_timeout( &dev->xfer_complete, dev->adap.timeout); if (timeout < 0) { dev_err(&dev->pdev->dev, - "wait_for_completion_interruptible_timeout" + "wait_for_completion_timeout" "returned %d waiting for event\n", timeout); status = timeout; } -- cgit v1.1 From 584b408d37af4e0b38ad5b60f236381bcdf396bc Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 4 Aug 2011 07:53:02 -0700 Subject: Revert "i2c-omap: fix static suspend vs. runtime suspend" This reverts commit adf6e07922255937c8bfeea777d19502b4c9a2be. Remove system PM methods which can race with runtime PM methods. Also, as of v3.1, the PM domain level code for OMAP handles device power state transistions automatically for devices, so drivers no longer need to specifically call the bus/pm_domain methods themselves. Signed-off-by: Kevin Hilman Signed-off-by: Ben Dooks --- drivers/i2c/busses/i2c-omap.c | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 1a766cf..2dfb631 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1139,41 +1139,12 @@ omap_i2c_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_SUSPEND -static int omap_i2c_suspend(struct device *dev) -{ - if (!pm_runtime_suspended(dev)) - if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) - dev->bus->pm->runtime_suspend(dev); - - return 0; -} - -static int omap_i2c_resume(struct device *dev) -{ - if (!pm_runtime_suspended(dev)) - if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) - dev->bus->pm->runtime_resume(dev); - - return 0; -} - -static struct dev_pm_ops omap_i2c_pm_ops = { - .suspend = omap_i2c_suspend, - .resume = omap_i2c_resume, -}; -#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops) -#else -#define OMAP_I2C_PM_OPS NULL -#endif - static struct platform_driver omap_i2c_driver = { .probe = omap_i2c_probe, .remove = omap_i2c_remove, .driver = { .name = "omap_i2c", .owner = THIS_MODULE, - .pm = OMAP_I2C_PM_OPS, }, }; -- cgit v1.1 From 80900d0140a7648587982c8f299830e900e49165 Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Mon, 22 Aug 2011 23:19:48 +0300 Subject: wl12xx: Remove obsolete testmode NVS push command The testmode NVS push command is no longer in use. In addition, it has several implementation issues that prevent it from working correctly: 1. wl1271_tm_cmd_configure relies on wl->chip.id being set. However, since the device was not necessarily booted by the time the function is called, wl->chip.id will be initialized to 0. 2. The NVS file is fetched by calling request_firmware() before it is possible to push an NVS file. 3. The maximum allowed size of nl binary payloads is not sufficient for pushing NVS files. 4. Pushing 128x NVS files will always fail due to a bug in the validation code. 5. In case the pushed NVS file is found invalid, the mutex will be kept locked and the nvs member will become a dangling pointer. Since this feature is not being used, remove it completely instead of fixing it. Signed-off-by: Ido Yariv Acked-by: Luciano Coelho Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/testmode.c | 45 ---------------------------------- 1 file changed, 45 deletions(-) diff --git a/drivers/net/wireless/wl12xx/testmode.c b/drivers/net/wireless/wl12xx/testmode.c index 88add68..4ae8eff 100644 --- a/drivers/net/wireless/wl12xx/testmode.c +++ b/drivers/net/wireless/wl12xx/testmode.c @@ -36,7 +36,6 @@ enum wl1271_tm_commands { WL1271_TM_CMD_TEST, WL1271_TM_CMD_INTERROGATE, WL1271_TM_CMD_CONFIGURE, - WL1271_TM_CMD_NVS_PUSH, WL1271_TM_CMD_SET_PLT_MODE, WL1271_TM_CMD_RECOVER, @@ -190,48 +189,6 @@ static int wl1271_tm_cmd_configure(struct wl1271 *wl, struct nlattr *tb[]) return 0; } -static int wl1271_tm_cmd_nvs_push(struct wl1271 *wl, struct nlattr *tb[]) -{ - int ret = 0; - size_t len; - void *buf; - - wl1271_debug(DEBUG_TESTMODE, "testmode cmd nvs push"); - - if (!tb[WL1271_TM_ATTR_DATA]) - return -EINVAL; - - buf = nla_data(tb[WL1271_TM_ATTR_DATA]); - len = nla_len(tb[WL1271_TM_ATTR_DATA]); - - mutex_lock(&wl->mutex); - - kfree(wl->nvs); - - if ((wl->chip.id == CHIP_ID_1283_PG20) && - (len != sizeof(struct wl128x_nvs_file))) - return -EINVAL; - else if (len != sizeof(struct wl1271_nvs_file)) - return -EINVAL; - - wl->nvs = kzalloc(len, GFP_KERNEL); - if (!wl->nvs) { - wl1271_error("could not allocate memory for the nvs file"); - ret = -ENOMEM; - goto out; - } - - memcpy(wl->nvs, buf, len); - wl->nvs_len = len; - - wl1271_debug(DEBUG_TESTMODE, "testmode pushed nvs"); - -out: - mutex_unlock(&wl->mutex); - - return ret; -} - static int wl1271_tm_cmd_set_plt_mode(struct wl1271 *wl, struct nlattr *tb[]) { u32 val; @@ -288,8 +245,6 @@ int wl1271_tm_cmd(struct ieee80211_hw *hw, void *data, int len) return wl1271_tm_cmd_interrogate(wl, tb); case WL1271_TM_CMD_CONFIGURE: return wl1271_tm_cmd_configure(wl, tb); - case WL1271_TM_CMD_NVS_PUSH: - return wl1271_tm_cmd_nvs_push(wl, tb); case WL1271_TM_CMD_SET_PLT_MODE: return wl1271_tm_cmd_set_plt_mode(wl, tb); case WL1271_TM_CMD_RECOVER: -- cgit v1.1 From a15f1c45f393982196c981a8df8b534cc9f3bb80 Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Mon, 22 Aug 2011 23:19:49 +0300 Subject: wl12xx: Fix validation of pm_runtime_get_sync return value wl1271_sdio_power_on checks if the return value of pm_runtime_get_sync is non-zero, and if so bails out. However, pm_runtime_get_sync can return a positive number which does not suggest an error has occurred. This is problematic for two reasons: 1. The function will needlessly bail out without decrementing back the runtime PM reference counter. 2. wl1271_power_on only checks if wl1271_power_on return value is negative. This means that wl1271_power_on will continue even if wl1271_sdio_power_on bailed out. As a result, sdio transactions will be initiated without properly enabling the sdio function and claiming the host. This could even lead to a kernel panic. Fix this by only checking that the return value of pm_runtime_get_sync is non-negative. Signed-off-by: Ido Yariv Acked-by: Luciano Coelho Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/wl12xx/sdio.c b/drivers/net/wireless/wl12xx/sdio.c index 5cf18c2..fb1fd5a 100644 --- a/drivers/net/wireless/wl12xx/sdio.c +++ b/drivers/net/wireless/wl12xx/sdio.c @@ -164,7 +164,7 @@ static int wl1271_sdio_power_on(struct wl1271 *wl) /* If enabled, tell runtime PM not to power off the card */ if (pm_runtime_enabled(&func->dev)) { ret = pm_runtime_get_sync(&func->dev); - if (ret) + if (ret < 0) goto out; } else { /* Runtime PM is disabled: power up the card manually */ -- cgit v1.1 From 7a5e4877c14de0827dbda8efa5080089757a8733 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 23 Aug 2011 11:42:25 +0300 Subject: wl12xx: add max_sched_scan_ssids value to the hw description After commit 5a865ba, we require a separate value to indicate the number of supported SSIDs in scheduled scans. This patch adds a proper value to the wl12xx driver. This fixes a regression in 3.1-rc3 where scheduled scans were not working properly with the wl12xx driver. Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/wl12xx/main.c b/drivers/net/wireless/wl12xx/main.c index e58c22d..b70ae40 100644 --- a/drivers/net/wireless/wl12xx/main.c +++ b/drivers/net/wireless/wl12xx/main.c @@ -4283,6 +4283,7 @@ int wl1271_init_ieee80211(struct wl1271 *wl) wl->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_AP); wl->hw->wiphy->max_scan_ssids = 1; + wl->hw->wiphy->max_sched_scan_ssids = 1; /* * Maximum length of elements in scanning probe request templates * should be the maximum length possible for a template, without -- cgit v1.1 From 9818a4775a3ab18b84a689537088b3d72a742130 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 8 Aug 2011 15:57:45 +0200 Subject: staging: brcm80211: fix compile error on non-x86 archs since 3.0 kernel Since the arrival of kernel version 3.0 in the staging tree it turns out compile error occurs for sparc64, powerpc, and arm platforms. This patch fixes that issue. Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Henry Ptasinski Signed-off-by: Arend van Spriel Signed-off-by: Greg Kroah-Hartman --- drivers/staging/brcm80211/brcmsmac/types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/brcm80211/brcmsmac/types.h b/drivers/staging/brcm80211/brcmsmac/types.h index bbf2189..823b5e4 100644 --- a/drivers/staging/brcm80211/brcmsmac/types.h +++ b/drivers/staging/brcm80211/brcmsmac/types.h @@ -18,6 +18,7 @@ #define _BRCM_TYPES_H_ #include +#include /* Bus types */ #define SI_BUS 0 /* SOC Interconnect */ -- cgit v1.1 From 20cc7995fe66ce6417678bb0db6b3d4955fb1ff6 Mon Sep 17 00:00:00 2001 From: Pieter-Paul Giesberts Date: Mon, 8 Aug 2011 15:59:03 +0200 Subject: staging: brcm80211: SPARC build error fix Due to missing memset function declaration. Reviewed-by: Roland Vossen Signed-off-by: Arend van Spriel Signed-off-by: Greg Kroah-Hartman --- drivers/staging/brcm80211/brcmsmac/otp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/brcm80211/brcmsmac/otp.c b/drivers/staging/brcm80211/brcmsmac/otp.c index 34253cf..4a70180 100644 --- a/drivers/staging/brcm80211/brcmsmac/otp.c +++ b/drivers/staging/brcm80211/brcmsmac/otp.c @@ -16,6 +16,7 @@ #include #include +#include #include #include -- cgit v1.1 From ba8f318471f66d5d5b79da68112525cf432b2b18 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 18 Aug 2011 09:37:02 +0100 Subject: m68k: fix __page_to_pfn for a const struct page argument Fixes fallout due to the removal of the cast in commit aa462abe8aaf ("mm: fix __page_to_pfn for a const struct page argument") Signed-off-by: Ian Campbell Cc: Andrew Morton Acked-by: Geert Uytterhoeven Cc: linux-m68k@lists.linux-m68k.org Signed-off-by: Linus Torvalds --- arch/m68k/include/asm/page_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h index 31d5570..89f2014 100644 --- a/arch/m68k/include/asm/page_mm.h +++ b/arch/m68k/include/asm/page_mm.h @@ -162,7 +162,7 @@ static inline __attribute_const__ int __virt_to_node_shift(void) pgdat->node_mem_map + (__pfn - pgdat->node_start_pfn); \ }) #define page_to_pfn(_page) ({ \ - struct page *__p = (_page); \ + const struct page *__p = (_page); \ struct pglist_data *pgdat; \ pgdat = &pg_data_map[page_to_nid(__p)]; \ ((__p) - pgdat->node_mem_map) + pgdat->node_start_pfn; \ -- cgit v1.1 From c5f5c4db393837ebb2ae47bf061d70e498f48f8c Mon Sep 17 00:00:00 2001 From: Seth Jennings Date: Wed, 10 Aug 2011 12:56:49 -0500 Subject: staging: zcache: fix crash on high memory swap zcache_put_page() was modified to pass page_address(page) instead of the actual page structure. In combination with the function signature changes to tmem_put() and zcache_pampd_create(), zcache_pampd_create() tries to (re)derive the page structure from the virtual address. However, if the original page is a high memory page (or any unmapped page), this virt_to_page() fails because the page_address() in zcache_put_page() returned NULL. This patch changes zcache_put_page() and zcache_get_page() to pass the page structure instead of the page's virtual address, which may or may not exist. Signed-off-by: Seth Jennings Acked-by: Dan Magenheimer Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zcache/zcache-main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index 855a5bb..a3f5162 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1158,7 +1158,7 @@ static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph, size_t clen; int ret; unsigned long count; - struct page *page = virt_to_page(data); + struct page *page = (struct page *)(data); struct zcache_client *cli = pool->client; uint16_t client_id = get_client_id_from_client(cli); unsigned long zv_mean_zsize; @@ -1227,7 +1227,7 @@ static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw, int ret = 0; BUG_ON(is_ephemeral(pool)); - zv_decompress(virt_to_page(data), pampd); + zv_decompress((struct page *)(data), pampd); return ret; } @@ -1539,7 +1539,7 @@ static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp, goto out; if (!zcache_freeze && zcache_do_preload(pool) == 0) { /* preload does preempt_disable on success */ - ret = tmem_put(pool, oidp, index, page_address(page), + ret = tmem_put(pool, oidp, index, (char *)(page), PAGE_SIZE, 0, is_ephemeral(pool)); if (ret < 0) { if (is_ephemeral(pool)) @@ -1572,7 +1572,7 @@ static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp, pool = zcache_get_pool_by_id(cli_id, pool_id); if (likely(pool != NULL)) { if (atomic_read(&pool->obj_count) > 0) - ret = tmem_get(pool, oidp, index, page_address(page), + ret = tmem_get(pool, oidp, index, (char *)(page), &size, 0, is_ephemeral(pool)); zcache_put_pool(pool); } -- cgit v1.1 From 1dcab0875b113a148b6601d87b4e0e3444440339 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 9 Aug 2011 21:01:33 +0300 Subject: Staging: zcache: signedness bug in tmem_get() "ret" needs to be signed for the error handling to work properly. Signed-off-by: Dan Carpenter Acked-by: Dan Magenheimer Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zcache/tmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/zcache/tmem.c b/drivers/staging/zcache/tmem.c index 975e34b..1ca66ea 100644 --- a/drivers/staging/zcache/tmem.c +++ b/drivers/staging/zcache/tmem.c @@ -604,7 +604,7 @@ int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, struct tmem_obj *obj; void *pampd; bool ephemeral = is_ephemeral(pool); - uint32_t ret = -1; + int ret = -1; struct tmem_hashbucket *hb; bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral); bool lock_held = false; -- cgit v1.1 From 048316be72893455f69ad728fa94c26e2e582ba2 Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 16 Aug 2011 10:10:56 -0700 Subject: staging: octeon-ethernet: Add missing #includes. I looks like something used to implicitly include linux/interrupt.h, and no longer does. Fix the resulting build error by explicitly including it. Signed-off-by: David Daney Cc: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet-rgmii.c | 1 + drivers/staging/octeon/ethernet-spi.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/staging/octeon/ethernet-rgmii.c b/drivers/staging/octeon/ethernet-rgmii.c index 9c0d293..c3d73f8 100644 --- a/drivers/staging/octeon/ethernet-rgmii.c +++ b/drivers/staging/octeon/ethernet-rgmii.c @@ -26,6 +26,7 @@ **********************************************************************/ #include #include +#include #include #include #include diff --git a/drivers/staging/octeon/ethernet-spi.c b/drivers/staging/octeon/ethernet-spi.c index 9708254..d0e2d51 100644 --- a/drivers/staging/octeon/ethernet-spi.c +++ b/drivers/staging/octeon/ethernet-spi.c @@ -26,6 +26,7 @@ **********************************************************************/ #include #include +#include #include #include -- cgit v1.1 From 7ca0758cdb7c241cb4e0490a8d95f0eb5b861daf Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 22 Aug 2011 13:27:06 -0700 Subject: x86-32, vdso: On system call restart after SYSENTER, use int $0x80 When we enter a 32-bit system call via SYSENTER or SYSCALL, we shuffle the arguments to match the int $0x80 calling convention. This was probably a design mistake, but it's what it is now. This causes errors if the system call as to be restarted. For SYSENTER, we have to invoke the instruction from the vdso as the return address is hardcoded. Accordingly, we can simply replace the jump in the vdso with an int $0x80 instruction and use the slower entry point for a post-restart. Suggested-by: Linus Torvalds Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/CA%2B55aFztZ=r5wa0x26KJQxvZOaQq8s2v3u50wCyJcA-Sc4g8gQ@mail.gmail.com Cc: --- arch/x86/vdso/vdso32/sysenter.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/vdso/vdso32/sysenter.S index e2800af..e354bce 100644 --- a/arch/x86/vdso/vdso32/sysenter.S +++ b/arch/x86/vdso/vdso32/sysenter.S @@ -43,7 +43,7 @@ __kernel_vsyscall: .space 7,0x90 /* 14: System call restart point is here! (SYSENTER_RETURN-2) */ - jmp .Lenter_kernel + int $0x80 /* 16: System call normal return point is here! */ VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */ pop %ebp -- cgit v1.1 From 1a878284473284f9577d44babf16d87152a05c33 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Jul 2011 17:16:40 -0700 Subject: [SCSI] isci: fix sata response handling A bug (likely copy/paste) that has been carried from the original implementation. The unsolicited frame handling structure returns the d2h fis in the isci_request.stp.rsp buffer. Cc: Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/request.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index a46e07a..b4cf998 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -2399,22 +2399,19 @@ static void isci_task_save_for_upper_layer_completion( } } -static void isci_request_process_stp_response(struct sas_task *task, - void *response_buffer) +static void isci_process_stp_response(struct sas_task *task, struct dev_to_host_fis *fis) { - struct dev_to_host_fis *d2h_reg_fis = response_buffer; struct task_status_struct *ts = &task->task_status; struct ata_task_resp *resp = (void *)&ts->buf[0]; - resp->frame_len = le16_to_cpu(*(__le16 *)(response_buffer + 6)); - memcpy(&resp->ending_fis[0], response_buffer + 16, 24); + resp->frame_len = sizeof(*fis); + memcpy(resp->ending_fis, fis, sizeof(*fis)); ts->buf_valid_size = sizeof(*resp); - /** - * If the device fault bit is set in the status register, then + /* If the device fault bit is set in the status register, then * set the sense data and return. */ - if (d2h_reg_fis->status & ATA_DF) + if (fis->status & ATA_DF) ts->stat = SAS_PROTO_RESPONSE; else ts->stat = SAM_STAT_GOOD; @@ -2428,7 +2425,6 @@ static void isci_request_io_request_complete(struct isci_host *ihost, { struct sas_task *task = isci_request_access_task(request); struct ssp_response_iu *resp_iu; - void *resp_buf; unsigned long task_flags; struct isci_remote_device *idev = isci_lookup_device(task->dev); enum service_response response = SAS_TASK_UNDELIVERED; @@ -2565,9 +2561,7 @@ static void isci_request_io_request_complete(struct isci_host *ihost, task); if (sas_protocol_ata(task->task_proto)) { - resp_buf = &request->stp.rsp; - isci_request_process_stp_response(task, - resp_buf); + isci_process_stp_response(task, &request->stp.rsp); } else if (SAS_PROTOCOL_SSP == task->task_proto) { /* crack the iu response buffer. */ -- cgit v1.1 From ee33e2b771f9e9e4aaba2bb2ace7b727fe451a8b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Jul 2011 17:16:45 -0700 Subject: [SCSI] isci: fix 32-bit operation when CONFIG_HIGHMEM64G=n The unsolicited frame control infrastructure requires a table of dma addresses for the hardware to lookup the frame buffer location by an index. The hardware expects the elements of this table to be 64-bit quantities, so we cannot reference these elements as dma_addr_t. All unsolicited frame protocols are affected, particularly SATA-PIO and SMP which prevented direct-attached SATA drives and expander-attached drives to not be discovered. Cc: Reported-by: Jacek Danecki Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/unsolicited_frame_control.c | 2 +- drivers/scsi/isci/unsolicited_frame_control.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/isci/unsolicited_frame_control.c b/drivers/scsi/isci/unsolicited_frame_control.c index e9e1e2a..16f88ab 100644 --- a/drivers/scsi/isci/unsolicited_frame_control.c +++ b/drivers/scsi/isci/unsolicited_frame_control.c @@ -72,7 +72,7 @@ int sci_unsolicited_frame_control_construct(struct isci_host *ihost) */ buf_len = SCU_MAX_UNSOLICITED_FRAMES * SCU_UNSOLICITED_FRAME_BUFFER_SIZE; header_len = SCU_MAX_UNSOLICITED_FRAMES * sizeof(struct scu_unsolicited_frame_header); - size = buf_len + header_len + SCU_MAX_UNSOLICITED_FRAMES * sizeof(dma_addr_t); + size = buf_len + header_len + SCU_MAX_UNSOLICITED_FRAMES * sizeof(uf_control->address_table.array[0]); /* * The Unsolicited Frame buffers are set at the start of the UF diff --git a/drivers/scsi/isci/unsolicited_frame_control.h b/drivers/scsi/isci/unsolicited_frame_control.h index 31cb950..75d8966 100644 --- a/drivers/scsi/isci/unsolicited_frame_control.h +++ b/drivers/scsi/isci/unsolicited_frame_control.h @@ -214,7 +214,7 @@ struct sci_uf_address_table_array { * starting address of the UF address table. * 64-bit pointers are required by the hardware. */ - dma_addr_t *array; + u64 *array; /** * This field specifies the physical address location for the UF -- cgit v1.1 From 985af6f70dbb8a33b3af8a7c7df508d924650e37 Mon Sep 17 00:00:00 2001 From: Marcin Tomczak Date: Fri, 29 Jul 2011 17:16:50 -0700 Subject: [SCSI] isci: change sas phy timeouts from 54us to 59us Need the following workaround in the driver for interoperability with the older Intel SSD drives and any other SATA drive that may exhibit the same behavior. This is a corner case where SCU speed is limited to either 3G or 1.5G and the drive has a period of DC idle when it switches speed during SATA speed negotiation. Workaround :change PHYTOV[31:24] from 0x36 to 0x3B. Signed-off-by: Marcin Tomczak Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/phy.c | 13 +++++++++++++ drivers/scsi/isci/registers.h | 12 ++++++++++++ 2 files changed, 25 insertions(+) diff --git a/drivers/scsi/isci/phy.c b/drivers/scsi/isci/phy.c index 79313a7..430fc8f 100644 --- a/drivers/scsi/isci/phy.c +++ b/drivers/scsi/isci/phy.c @@ -104,6 +104,7 @@ sci_phy_link_layer_initialization(struct isci_phy *iphy, u32 parity_count = 0; u32 llctl, link_rate; u32 clksm_value = 0; + u32 sp_timeouts = 0; iphy->link_layer_registers = reg; @@ -211,6 +212,18 @@ sci_phy_link_layer_initialization(struct isci_phy *iphy, llctl |= SCU_SAS_LLCTL_GEN_VAL(MAX_LINK_RATE, link_rate); writel(llctl, &iphy->link_layer_registers->link_layer_control); + sp_timeouts = readl(&iphy->link_layer_registers->sas_phy_timeouts); + + /* Clear the default 0x36 (54us) RATE_CHANGE timeout value. */ + sp_timeouts &= ~SCU_SAS_PHYTOV_GEN_VAL(RATE_CHANGE, 0xFF); + + /* Set RATE_CHANGE timeout value to 0x3B (59us). This ensures SCU can + * lock with 3Gb drive when SCU max rate is set to 1.5Gb. + */ + sp_timeouts |= SCU_SAS_PHYTOV_GEN_VAL(RATE_CHANGE, 0x3B); + + writel(sp_timeouts, &iphy->link_layer_registers->sas_phy_timeouts); + if (is_a2(ihost->pdev)) { /* Program the max ARB time for the PHY to 700us so we inter-operate with * the PMC expander which shuts down PHYs if the expander PHY generates too diff --git a/drivers/scsi/isci/registers.h b/drivers/scsi/isci/registers.h index 9b266c7..00afc73 100644 --- a/drivers/scsi/isci/registers.h +++ b/drivers/scsi/isci/registers.h @@ -1299,6 +1299,18 @@ struct scu_transport_layer_registers { #define SCU_AFE_XCVRCR_OFFSET 0x00DC #define SCU_AFE_LUTCR_OFFSET 0x00E0 +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_ALIGN_DETECTION_SHIFT (0UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_ALIGN_DETECTION_MASK (0x000000FFUL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_HOT_PLUG_SHIFT (8UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_HOT_PLUG_MASK (0x0000FF00UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_COMSAS_DETECTION_SHIFT (16UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_COMSAS_DETECTION_MASK (0x00FF0000UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_RATE_CHANGE_SHIFT (24UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_RATE_CHANGE_MASK (0xFF000000UL) + +#define SCU_SAS_PHYTOV_GEN_VAL(name, value) \ + SCU_GEN_VALUE(SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_##name, value) + #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_SHIFT (0) #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_MASK (0x00000003) #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_GEN1 (0) -- cgit v1.1 From 4ac13e177904280a2502c27029a72e3fd2957cde Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Jul 2011 17:16:55 -0700 Subject: [SCSI] isci: Update MAINTAINERS entry for the isci driver Signed-off-by: Dave Jiang Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 069ee3b..4fe6854 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3262,6 +3262,17 @@ F: Documentation/input/multi-touch-protocol.txt F: drivers/input/input-mt.c K: \b(ABS|SYN)_MT_ +INTEL C600 SERIES SAS CONTROLLER DRIVER +M: Intel SCU Linux support +M: Dan Williams +M: Dave Jiang +M: Ed Nadolski +L: linux-scsi@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git +S: Maintained +F: drivers/scsi/isci/ +F: firmware/isci/ + INTEL IDLE DRIVER M: Len Brown L: linux-pm@lists.linux-foundation.org -- cgit v1.1 From 3a7bda830fad427768ed71c0ebf3448849c006b5 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Jul 2011 17:17:00 -0700 Subject: [SCSI] isci: Adding documentation to API change and fixup sysfs registration Adding API update for adding isci_id entry scsi_host sysfs entry. Also fixing up the sysfs registration to the scsi_host template Signed-off-by: Dave Jiang Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- Documentation/ABI/testing/sysfs-class-scsi_host | 13 +++++++++ drivers/scsi/isci/init.c | 36 ++++++++++++------------- 2 files changed, 31 insertions(+), 18 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-scsi_host diff --git a/Documentation/ABI/testing/sysfs-class-scsi_host b/Documentation/ABI/testing/sysfs-class-scsi_host new file mode 100644 index 0000000..29a4f89 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-scsi_host @@ -0,0 +1,13 @@ +What: /sys/class/scsi_host/hostX/isci_id +Date: June 2011 +Contact: Dave Jiang +Description: + This file contains the enumerated host ID for the Intel + SCU controller. The Intel(R) C600 Series Chipset SATA/SAS + Storage Control Unit embeds up to two 4-port controllers in + a single PCI device. The controllers are enumerated in order + which usually means the lowest number scsi_host corresponds + with the first controller, but this association is not + guaranteed. The 'isci_id' attribute unambiguously identifies + the controller index: '0' for the first controller, + '1' for the second. diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index 61e0d09..e78320b 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "isci.h" #include "task.h" #include "probe_roms.h" @@ -113,6 +114,22 @@ unsigned char max_concurr_spinup = 1; module_param(max_concurr_spinup, byte, 0); MODULE_PARM_DESC(max_concurr_spinup, "Max concurrent device spinup"); +static ssize_t isci_show_id(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = container_of(dev, typeof(*shost), shost_dev); + struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost); + struct isci_host *ihost = container_of(sas_ha, typeof(*ihost), sas_ha); + + return snprintf(buf, PAGE_SIZE, "%d\n", ihost->id); +} + +static DEVICE_ATTR(isci_id, S_IRUGO, isci_show_id, NULL); + +struct device_attribute *isci_host_attrs[] = { + &dev_attr_isci_id, + NULL +}; + static struct scsi_host_template isci_sht = { .module = THIS_MODULE, @@ -138,6 +155,7 @@ static struct scsi_host_template isci_sht = { .slave_alloc = sas_slave_alloc, .target_destroy = sas_target_destroy, .ioctl = sas_ioctl, + .shost_attrs = isci_host_attrs, }; static struct sas_domain_function_template isci_transport_ops = { @@ -232,17 +250,6 @@ static int isci_register_sas_ha(struct isci_host *isci_host) return 0; } -static ssize_t isci_show_id(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct Scsi_Host *shost = container_of(dev, typeof(*shost), shost_dev); - struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost); - struct isci_host *ihost = container_of(sas_ha, typeof(*ihost), sas_ha); - - return snprintf(buf, PAGE_SIZE, "%d\n", ihost->id); -} - -static DEVICE_ATTR(isci_id, S_IRUGO, isci_show_id, NULL); - static void isci_unregister(struct isci_host *isci_host) { struct Scsi_Host *shost; @@ -251,7 +258,6 @@ static void isci_unregister(struct isci_host *isci_host) return; shost = isci_host->shost; - device_remove_file(&shost->shost_dev, &dev_attr_isci_id); sas_unregister_ha(&isci_host->sas_ha); @@ -415,14 +421,8 @@ static struct isci_host *isci_host_alloc(struct pci_dev *pdev, int id) if (err) goto err_shost_remove; - err = device_create_file(&shost->shost_dev, &dev_attr_isci_id); - if (err) - goto err_unregister_ha; - return isci_host; - err_unregister_ha: - sas_unregister_ha(&(isci_host->sas_ha)); err_shost_remove: scsi_remove_host(shost); err_shost: -- cgit v1.1 From 39ea2c5b5ffaa344467da53e885cfa4ac0105050 Mon Sep 17 00:00:00 2001 From: Jeff Skirvin Date: Fri, 29 Jul 2011 17:17:05 -0700 Subject: [SCSI] isci: Leave requests alone if already terminating. Instead of immediately completing any request that has a second termination call made on it, wait for the TC done/abort HW event. Signed-off-by: Jeff Skirvin Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/request.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index b4cf998..b5d3a8c 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -732,12 +732,20 @@ sci_io_request_terminate(struct isci_request *ireq) sci_change_state(&ireq->sm, SCI_REQ_ABORTING); return SCI_SUCCESS; case SCI_REQ_TASK_WAIT_TC_RESP: + /* The task frame was already confirmed to have been + * sent by the SCU HW. Since the state machine is + * now only waiting for the task response itself, + * abort the request and complete it immediately + * and don't wait for the task response. + */ sci_change_state(&ireq->sm, SCI_REQ_ABORTING); sci_change_state(&ireq->sm, SCI_REQ_COMPLETED); return SCI_SUCCESS; case SCI_REQ_ABORTING: - sci_change_state(&ireq->sm, SCI_REQ_COMPLETED); - return SCI_SUCCESS; + /* If a request has a termination requested twice, return + * a failure indication, since HW confirmation of the first + * abort is still outstanding. + */ case SCI_REQ_COMPLETED: default: dev_warn(&ireq->owning_controller->pdev->dev, -- cgit v1.1 From 9b4be528999483d70a1ffc0accd102e477d5a503 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Jul 2011 17:17:10 -0700 Subject: [SCSI] isci: dynamic interrupt coalescing Hardware allows both an outstanding number commands and a timeout value (whichever occurs first) as a gate to the next interrupt generation. This scheme at completion time looks at the remaining number of outstanding tasks and sets the timeout to maximize small transaction operation. If transactions are large (take more than a few 10s of microseconds to complete) then performance is not interrupt processing bound, so the small timeouts this scheme generates are overridden by the time it takes for a completion to arrive. Tested-by: Dave Jiang Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/host.c | 10 +++++++++- drivers/scsi/isci/host.h | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index 26072f1..2328f98 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -1091,6 +1091,7 @@ static void isci_host_completion_routine(unsigned long data) struct isci_request *request; struct isci_request *next_request; struct sas_task *task; + u16 active; INIT_LIST_HEAD(&completed_request_list); INIT_LIST_HEAD(&errored_request_list); @@ -1181,6 +1182,13 @@ static void isci_host_completion_routine(unsigned long data) } } + /* the coalesence timeout doubles at each encoding step, so + * update it based on the ilog2 value of the outstanding requests + */ + active = isci_tci_active(ihost); + writel(SMU_ICC_GEN_VAL(NUMBER, active) | + SMU_ICC_GEN_VAL(TIMER, ISCI_COALESCE_BASE + ilog2(active)), + &ihost->smu_registers->interrupt_coalesce_control); } /** @@ -1471,7 +1479,7 @@ static void sci_controller_ready_state_enter(struct sci_base_state_machine *sm) struct isci_host *ihost = container_of(sm, typeof(*ihost), sm); /* set the default interrupt coalescence number and timeout value. */ - sci_controller_set_interrupt_coalescence(ihost, 0x10, 250); + sci_controller_set_interrupt_coalescence(ihost, 0, 0); } static void sci_controller_ready_state_exit(struct sci_base_state_machine *sm) diff --git a/drivers/scsi/isci/host.h b/drivers/scsi/isci/host.h index 062101a..9f33831 100644 --- a/drivers/scsi/isci/host.h +++ b/drivers/scsi/isci/host.h @@ -369,6 +369,9 @@ static inline struct isci_host *dev_to_ihost(struct domain_device *dev) #define ISCI_TAG_SEQ(tag) (((tag) >> 12) & (SCI_MAX_SEQ-1)) #define ISCI_TAG_TCI(tag) ((tag) & (SCI_MAX_IO_REQUESTS-1)) +/* interrupt coalescing baseline: 9 == 3 to 5us interrupt delay per command */ +#define ISCI_COALESCE_BASE 9 + /* expander attached sata devices require 3 rnc slots */ static inline int sci_remote_device_node_count(struct isci_remote_device *idev) { -- cgit v1.1 From 77cd72a53f6426f81b7f56a862402849ee903bda Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Jul 2011 17:17:16 -0700 Subject: [SCSI] isci: fix event-get pointer increment Hardware only increments the put pointer on event types >= 4. Do not increment the get pointer for event type 3. Reported-by: Kapil Karkra Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/host.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index 2328f98..6981b77 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -531,6 +531,9 @@ static void sci_controller_process_completions(struct isci_host *ihost) break; case SCU_COMPLETION_TYPE_EVENT: + sci_controller_event_completion(ihost, ent); + break; + case SCU_COMPLETION_TYPE_NOTIFY: { event_cycle ^= ((event_get+1) & SCU_MAX_EVENTS) << (SMU_COMPLETION_QUEUE_GET_EVENT_CYCLE_BIT_SHIFT - SCU_MAX_EVENTS_SHIFT); -- cgit v1.1 From 98e2a5a3a125608505783bdb95744997f76b3c30 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Jul 2011 17:17:21 -0700 Subject: [SCSI] isci: add version number Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/init.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index e78320b..29aa34e 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -64,6 +64,14 @@ #include "task.h" #include "probe_roms.h" +#define MAJ 1 +#define MIN 0 +#define BUILD 0 +#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ + __stringify(BUILD) + +MODULE_VERSION(DRV_VERSION); + static struct scsi_transport_template *isci_transport_template; static DEFINE_PCI_DEVICE_TABLE(isci_id_table) = { @@ -540,7 +548,8 @@ static __init int isci_init(void) { int err; - pr_info("%s: Intel(R) C600 SAS Controller Driver\n", DRV_NAME); + pr_info("%s: Intel(R) C600 SAS Controller Driver - version %s\n", + DRV_NAME, DRV_VERSION); isci_transport_template = sas_domain_attach_transport(&isci_transport_ops); if (!isci_transport_template) -- cgit v1.1 From b4cb0d4da745bc1d806b9b4a27cc4ce1f7adbf99 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 23 Aug 2011 21:04:28 -0700 Subject: hwmon: (i5k_amb) Drop i5k_channel_pci_id Function i5k_channel_pci_id looks like it can fail, while a better code design would make it more obvious that it can't. We can even get rid of the function. Signed-off-by: Jean Delvare Acked-by: Darrick J. Wong Signed-off-by: Guenter Roeck --- drivers/hwmon/i5k_amb.c | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/drivers/hwmon/i5k_amb.c b/drivers/hwmon/i5k_amb.c index c4c40be..d22f241 100644 --- a/drivers/hwmon/i5k_amb.c +++ b/drivers/hwmon/i5k_amb.c @@ -114,7 +114,6 @@ struct i5k_amb_data { void __iomem *amb_mmio; struct i5k_device_attribute *attrs; unsigned int num_attrs; - unsigned long chipset_id; }; static ssize_t show_name(struct device *dev, struct device_attribute *devattr, @@ -444,8 +443,6 @@ static int __devinit i5k_find_amb_registers(struct i5k_amb_data *data, goto out; } - data->chipset_id = devid; - res = 0; out: pci_dev_put(pcidev); @@ -478,23 +475,13 @@ out: return res; } -static unsigned long i5k_channel_pci_id(struct i5k_amb_data *data, - unsigned long channel) -{ - switch (data->chipset_id) { - case PCI_DEVICE_ID_INTEL_5000_ERR: - return PCI_DEVICE_ID_INTEL_5000_FBD0 + channel; - case PCI_DEVICE_ID_INTEL_5400_ERR: - return PCI_DEVICE_ID_INTEL_5400_FBD0 + channel; - default: - BUG(); - } -} - -static unsigned long chipset_ids[] = { - PCI_DEVICE_ID_INTEL_5000_ERR, - PCI_DEVICE_ID_INTEL_5400_ERR, - 0 +static struct { + unsigned long err; + unsigned long fbd0; +} chipset_ids[] __devinitdata = { + { PCI_DEVICE_ID_INTEL_5000_ERR, PCI_DEVICE_ID_INTEL_5000_FBD0 }, + { PCI_DEVICE_ID_INTEL_5400_ERR, PCI_DEVICE_ID_INTEL_5400_FBD0 }, + { 0, 0 } }; #ifdef MODULE @@ -510,8 +497,7 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev) { struct i5k_amb_data *data; struct resource *reso; - int i; - int res = -ENODEV; + int i, res; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) @@ -520,22 +506,22 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev) /* Figure out where the AMB registers live */ i = 0; do { - res = i5k_find_amb_registers(data, chipset_ids[i]); + res = i5k_find_amb_registers(data, chipset_ids[i].err); + if (res == 0) + break; i++; - } while (res && chipset_ids[i]); + } while (chipset_ids[i].err); if (res) goto err; /* Copy the DIMM presence map for the first two channels */ - res = i5k_channel_probe(&data->amb_present[0], - i5k_channel_pci_id(data, 0)); + res = i5k_channel_probe(&data->amb_present[0], chipset_ids[i].fbd0); if (res) goto err; /* Copy the DIMM presence map for the optional second two channels */ - i5k_channel_probe(&data->amb_present[2], - i5k_channel_pci_id(data, 1)); + i5k_channel_probe(&data->amb_present[2], chipset_ids[i].fbd0 + 1); /* Set up resource regions */ reso = request_mem_region(data->amb_base, data->amb_len, DRVNAME); -- cgit v1.1 From e8037d49835482c9534a9a07bed0d0ea831135ae Mon Sep 17 00:00:00 2001 From: Eric Seppanen Date: Tue, 23 Aug 2011 21:25:12 +0200 Subject: block: Fix queue_flag update when rq_affinity goes from 2 to 1 Commit 5757a6d76cdf added the QUEUE_FLAG_SAME_FORCE flag, but fails to clear that flag when the current state is '2' (SAME_COMP + SAME_FORCE) and the new state is '1' (SAME_COMP). Acked-by: Dan Williams Reviewed-by: Roland Dreier Signed-off-by: Eric Seppanen Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 0ee17b5..e681805 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -258,11 +258,13 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) ret = queue_var_store(&val, page, count); spin_lock_irq(q->queue_lock); - if (val) { + if (val == 2) { queue_flag_set(QUEUE_FLAG_SAME_COMP, q); - if (val == 2) - queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); - } else { + queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); + } else if (val == 1) { + queue_flag_set(QUEUE_FLAG_SAME_COMP, q); + queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); + } else if (val == 0) { queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); } -- cgit v1.1 From c2183d1e9b3f313dd8ba2b1b0197c8d9fb86a7ae Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 24 Aug 2011 10:20:17 +0200 Subject: fuse: check size of FUSE_NOTIFY_INVAL_ENTRY message FUSE_NOTIFY_INVAL_ENTRY didn't check the length of the write so the message processing could overrun and result in a "kernel BUG at fs/fuse/dev.c:629!" Reported-by: Han-Wen Nienhuys Signed-off-by: Miklos Szeredi CC: stable@kernel.org --- fs/fuse/dev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 640fc22..168a80f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1358,6 +1358,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, if (outarg.namelen > FUSE_NAME_MAX) goto err; + err = -EINVAL; + if (size != sizeof(outarg) + outarg.namelen + 1) + goto err; + name.name = buf; name.len = outarg.namelen; err = fuse_copy_one(cs, buf, outarg.namelen + 1); -- cgit v1.1 From 0ebb962e00a52b644433065d224ed89f72a84756 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Wed, 20 Jul 2011 15:43:42 +0100 Subject: ARM: 7003/1: vexpress: Add clock definition for the SP805. It seems that an entry for the SP805 watchdog in the table of clocks was missing. This results in the sp805_wdt driver rejecting the device with the following errors: sp805-wdt mb:wdt: Clock not found sp805-wdt mb:wdt: Probe Failed!!! sp805-wdt: probe of mb:wdt failed with error -2 While not obviously stated in the hardware docs, the onboard SP810's "REFCLK" is connected to a 32.768KHz crystal, and this drives the watchdog. Add a struct clk and corresponding lookup entry for it. Signed-off-by: Nick Bowler Signed-off-by: Russell King --- arch/arm/mach-vexpress/v2m.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 9e6b93b..d0d267a 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -318,6 +318,10 @@ static struct clk v2m_sp804_clk = { .rate = 1000000, }; +static struct clk v2m_ref_clk = { + .rate = 32768, +}; + static struct clk dummy_apb_pclk; static struct clk_lookup v2m_lookups[] = { @@ -348,6 +352,9 @@ static struct clk_lookup v2m_lookups[] = { }, { /* CLCD */ .dev_id = "mb:clcd", .clk = &osc1_clk, + }, { /* SP805 WDT */ + .dev_id = "mb:wdt", + .clk = &v2m_ref_clk, }, { /* SP804 timers */ .dev_id = "sp804", .con_id = "v2m-timer0", -- cgit v1.1 From 7675535958175b85b8117bcee245d9ecbc4d3d74 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 24 Aug 2011 10:53:10 +0200 Subject: ALSA: hda/conexant - Enable ADC-switching for auto-mic mode, too The ADC-switching can work also in the auto-mic mode, too. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 4c462c3..5616444 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -3866,7 +3866,7 @@ static void cx_auto_parse_input(struct hda_codec *codec) } if (imux->num_items >= 2 && cfg->num_inputs == imux->num_items) cx_auto_check_auto_mic(codec); - if (imux->num_items > 1 && !spec->auto_mic) { + if (imux->num_items > 1) { for (i = 1; i < imux->num_items; i++) { if (spec->imux_info[i].adc != spec->imux_info[0].adc) { spec->adc_switching = 1; -- cgit v1.1 From 52c49e0156e167fa65bbc3dd87a3a2f651af03fb Mon Sep 17 00:00:00 2001 From: Joseph Pentland Date: Tue, 23 Aug 2011 10:41:50 +0100 Subject: ASoC: Add Springbank I/O card to Speyside Kconfig Signed-off-by: Joseph Pentland Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/samsung/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/samsung/Kconfig b/sound/soc/samsung/Kconfig index b99091f..65f980e 100644 --- a/sound/soc/samsung/Kconfig +++ b/sound/soc/samsung/Kconfig @@ -185,6 +185,7 @@ config SND_SOC_SPEYSIDE select SND_SAMSUNG_I2S select SND_SOC_WM8996 select SND_SOC_WM9081 + select SND_SOC_WM1250_EV1 config SND_SOC_SPEYSIDE_WM8962 tristate "Audio support for Wolfson Speyside with WM8962" -- cgit v1.1 From 250b68512dd7e7d31a8c85a740a4b085bade4ba0 Mon Sep 17 00:00:00 2001 From: Sangbeom Kim Date: Tue, 23 Aug 2011 19:36:59 +0900 Subject: ASoC: Add samsung maintainer Signed-off-by: Sangbeom Kim Acked-by: Jassi Brar Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 46e3e6b..4f555d8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5532,6 +5532,7 @@ F: include/media/*7146* SAMSUNG AUDIO (ASoC) DRIVERS M: Jassi Brar +M: Sangbeom Kim L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported F: sound/soc/samsung -- cgit v1.1 From ee1a4d4b7fcfce31dade9f2ad333b34159cee799 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Tue, 23 Aug 2011 11:16:28 -0600 Subject: ASoC: Tegra: wm8903 machine driver: Drop Ventana support Board file support for Ventana is not yet mainlined, and probably won't ever be given the move to Device-Tree. Consequently, the Ventana entry is being removed from arch/arm/tools/mach-types in the next merge window, since it was registered over a year ago. This will also remove function machine_is_ventana(), which is used by the ASoC Tegra WM8903 machine driver. This will cause compilation failures. Drop Ventana support to resolve this. Hopefully, in the not-too-distant future, tegra_wm8903.c will be able to configure itself from Device-Tree, and hence we'll be able to re-instate Ventana support just by creating a .dts file for the board. Also note that Aebl support is in a similar boat. However, that board isn't scheduled for deprecation for at least another 5 months, and perhaps we will have completely removed non-Device-Tree support from tegra_wm8903.c by then and/or adjusted mach-types policy. Signed-off-by: Stephen Warren Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_wm8903.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra_wm8903.c b/sound/soc/tegra/tegra_wm8903.c index 661373c..be27f1d 100644 --- a/sound/soc/tegra/tegra_wm8903.c +++ b/sound/soc/tegra/tegra_wm8903.c @@ -319,7 +319,7 @@ static int tegra_wm8903_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_force_enable_pin(dapm, "Mic Bias"); /* FIXME: Calculate automatically based on DAPM routes? */ - if (!machine_is_harmony() && !machine_is_ventana()) + if (!machine_is_harmony()) snd_soc_dapm_nc_pin(dapm, "IN1L"); if (!machine_is_seaboard() && !machine_is_aebl()) snd_soc_dapm_nc_pin(dapm, "IN1R"); @@ -395,7 +395,7 @@ static __devinit int tegra_wm8903_driver_probe(struct platform_device *pdev) platform_set_drvdata(pdev, card); snd_soc_card_set_drvdata(card, machine); - if (machine_is_harmony() || machine_is_ventana()) { + if (machine_is_harmony()) { card->dapm_routes = harmony_audio_map; card->num_dapm_routes = ARRAY_SIZE(harmony_audio_map); } else if (machine_is_seaboard()) { -- cgit v1.1 From c0764b2a4cdc41779460eb8796bc76e4fbddf339 Mon Sep 17 00:00:00 2001 From: Jean-Christophe PLAGNIOL-VILLARD Date: Tue, 23 Aug 2011 16:35:31 +0200 Subject: at91: at91sam9261.c: fix typo in t2_clk alias for atmel_tcb.0 This was a typo in clockdev declaration for at91sam9261 SoC. Fix the kernel hanging when switching clocksource to TC (tcb_clksrc). Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD Acked-by: Remy Bohmer Acked-by: Nicolas Ferre --- arch/arm/mach-at91/at91sam9261.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-at91/at91sam9261.c b/arch/arm/mach-at91/at91sam9261.c index d522b47..6c8e3b5 100644 --- a/arch/arm/mach-at91/at91sam9261.c +++ b/arch/arm/mach-at91/at91sam9261.c @@ -157,7 +157,7 @@ static struct clk_lookup periph_clocks_lookups[] = { CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.1", &spi1_clk), CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tc0_clk), CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.0", &tc1_clk), - CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc1_clk), + CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc2_clk), CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk), CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk), CLKDEV_CON_DEV_ID("pclk", "ssc.2", &ssc2_clk), -- cgit v1.1 From a63271627521b825b0dd0a564e9a9c62b4c1ca89 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 24 Aug 2011 16:04:32 +0200 Subject: block: change force plug flush call order Do blk_flush_plug_list() first and then add new request aDo blk_flush_plug_list() first and then add new request aDo blk_flush_plug_list() first and then add new request at the tail. New request can't be merged to existing requests, but later new requests might be merged with this new one. If blk_flush_plug_list() is done later, the merge doesn't happen. Believe it or not, this fixes a 10% regression running sysbench workload. Signed-off-by: Shaohua Li Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 90e1ffd..67dba69 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1302,11 +1302,11 @@ get_rq: if (__rq->q != q) plug->should_sort = 1; } - list_add_tail(&req->queuelist, &plug->list); - plug->count++; - drive_stat_acct(req, 1); if (plug->count >= BLK_MAX_REQUEST_COUNT) blk_flush_plug_list(plug, false); + plug->count++; + list_add_tail(&req->queuelist, &plug->list); + drive_stat_acct(req, 1); } else { spin_lock_irq(q->queue_lock); add_acct_request(q, req, where); -- cgit v1.1 From 56ebdaf2fa3c5276be201c5d1aff1490b682ecf2 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 24 Aug 2011 16:04:34 +0200 Subject: block: simplify force plug flush code a little bit Cleaning up the code a little bit. attempt_plug_merge() traverses the plug list anyway, we can do the request counting there, so stack size is reduced a little bit. The motivation here is I suspect if we should count the requests for each queue (task could handle multiple disks in the meantime), but my test doesn't show it's worthy doing. If somebody proves we should do it, below change will make that more easier. Signed-off-by: Shaohua Li Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-core.c | 13 +++++++------ include/linux/blkdev.h | 1 - 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 67dba69..b2ed78a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1167,7 +1167,7 @@ static bool bio_attempt_front_merge(struct request_queue *q, * true if merge was successful, otherwise false. */ static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, - struct bio *bio) + struct bio *bio, unsigned int *request_count) { struct blk_plug *plug; struct request *rq; @@ -1176,10 +1176,13 @@ static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, plug = tsk->plug; if (!plug) goto out; + *request_count = 0; list_for_each_entry_reverse(rq, &plug->list, queuelist) { int el_ret; + (*request_count)++; + if (rq->q != q) continue; @@ -1219,6 +1222,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) struct blk_plug *plug; int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; struct request *req; + unsigned int request_count = 0; /* * low level driver can indicate that it wants pages above a @@ -1237,7 +1241,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) * Check if we can merge with the plugged list before grabbing * any locks. */ - if (attempt_plug_merge(current, q, bio)) + if (attempt_plug_merge(current, q, bio, &request_count)) goto out; spin_lock_irq(q->queue_lock); @@ -1302,9 +1306,8 @@ get_rq: if (__rq->q != q) plug->should_sort = 1; } - if (plug->count >= BLK_MAX_REQUEST_COUNT) + if (request_count >= BLK_MAX_REQUEST_COUNT) blk_flush_plug_list(plug, false); - plug->count++; list_add_tail(&req->queuelist, &plug->list); drive_stat_acct(req, 1); } else { @@ -2634,7 +2637,6 @@ void blk_start_plug(struct blk_plug *plug) INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->cb_list); plug->should_sort = 0; - plug->count = 0; /* * If this is a nested plug, don't actually assign it. It will be @@ -2718,7 +2720,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) return; list_splice_init(&plug->list, &list); - plug->count = 0; if (plug->should_sort) { list_sort(NULL, &list, plug_rq_cmp); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 84b15d5..7fbaa91 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -873,7 +873,6 @@ struct blk_plug { struct list_head list; struct list_head cb_list; unsigned int should_sort; - unsigned int count; }; #define BLK_MAX_REQUEST_COUNT 16 -- cgit v1.1 From 27e7318c3e47e4fac71fcb472623434063ccc7a5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 24 Aug 2011 17:15:09 +0200 Subject: [S390] nss,initrd: kernel image and initrd must be in different segments When IPL'ing from a block device and an NSS should be created we must make sure that the kernel image and the initrd are in different 1MB segments. Otherwise creating the NSS will fail. So we make sure the initrd is 4MB behind the end of the kernel image like we do already when IPL via the VM reader is performed. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 068f846..f297456 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -396,17 +396,19 @@ static __init void detect_machine_facilities(void) static __init void rescue_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD + unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20); /* - * Move the initrd right behind the bss section in case it starts - * within the bss section. So we don't overwrite it when the bss - * section gets cleared. + * Just like in case of IPL from VM reader we make sure there is a + * gap of 4MB between end of kernel and start of initrd. + * That way we can also be sure that saving an NSS will succeed, + * which however only requires different segments. */ if (!INITRD_START || !INITRD_SIZE) return; - if (INITRD_START >= (unsigned long) __bss_stop) + if (INITRD_START >= min_initrd_addr) return; - memmove(__bss_stop, (void *) INITRD_START, INITRD_SIZE); - INITRD_START = (unsigned long) __bss_stop; + memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE); + INITRD_START = min_initrd_addr; #endif } -- cgit v1.1 From ba465d830ed1703713251917f154688ec537580f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 24 Aug 2011 17:15:10 +0200 Subject: [S390] drivers/s390/block/dasd_ioctl.c: add missing kfree Data is only used to temporarily hold information to be copied to the user level, so it should be freed before leaving the function. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @exists@ local idexpression x; statement S,S1; expression E; identifier fl; expression *ptr != NULL; @@ x = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S <... when != x when != if (...) { <+...kfree(x)...+> } when any when != true x == NULL x->fl ...> ( if (x == NULL) S1 | if (...) { ... when != x when forall ( return \(0\|<+...x...+>\|ptr\); | * return ...; ) } ) // Signed-off-by: Julia Lawall Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_ioctl.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index eb4e034..f1a2016 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -249,6 +249,7 @@ static int dasd_ioctl_reset_profile(struct dasd_block *block) static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp) { struct dasd_profile_info_t *data; + int rc = 0; data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) @@ -279,11 +280,14 @@ static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp) spin_unlock_bh(&block->profile.lock); } else { spin_unlock_bh(&block->profile.lock); - return -EIO; + rc = -EIO; + goto out; } if (copy_to_user(argp, data, sizeof(*data))) - return -EFAULT; - return 0; + rc = -EFAULT; +out: + kfree(data); + return rc; } #else static int dasd_ioctl_reset_profile(struct dasd_block *block) -- cgit v1.1 From 798620fb1dd510d163f1c875c8422dc605f446da Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 24 Aug 2011 17:15:11 +0200 Subject: [S390] arch/s390/kernel/ipl.c: correct error detection check reipl_fcp_kset was just initialized, so it appears that it should be tested instead of reipl_kset. Signed-off-by: Julia Lawall Reported-by: Suman Saha Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 04361d5..ee28e06 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1220,7 +1220,7 @@ static int __init reipl_fcp_init(void) /* sysfs: create fcp kset for mixing attr group and bin attrs */ reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL, &reipl_kset->kobj); - if (!reipl_kset) { + if (!reipl_fcp_kset) { free_page((unsigned long) reipl_block_fcp); return -ENOMEM; } -- cgit v1.1 From e1202edadbf846f0a4de70c8c0b9fe5a6c88b1cb Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 24 Aug 2011 17:15:12 +0200 Subject: [S390] Change default action from reipl to stop for on_restart The main purpose for PSW restart will be kdump. Therefore customers will issue "system restart" for creating a dump. If kdump is not enabled, currently "PSW restart" will reboot the system and then no dump can be created any more. In order to still allow a manual stand-alone dump in the case a user issues "PSW restart" on a system that has not enabled kdump we now stop the system. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index ee28e06..48c7102 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1618,7 +1618,8 @@ static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR, static void stop_run(struct shutdown_trigger *trigger) { - if (strcmp(trigger->name, ON_PANIC_STR) == 0) + if (strcmp(trigger->name, ON_PANIC_STR) == 0 || + strcmp(trigger->name, ON_RESTART_STR) == 0) disabled_wait((unsigned long) __builtin_return_address(0)); while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) cpu_relax(); @@ -1717,7 +1718,7 @@ static void do_panic(void) /* on restart */ static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR, - &reipl_action}; + &stop_action}; static ssize_t on_restart_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) -- cgit v1.1 From 8adb4ca344b48bbbf87ca66fd07a2dd503619714 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 24 Aug 2011 17:15:13 +0200 Subject: [S390] memory hotplug: only unassign assigned increments Make sure that only assigned storage increments are unassigned when attaching a storage element. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/sclp_cmd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index be55fb2..837e010 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -383,8 +383,10 @@ static int sclp_attach_storage(u8 id) switch (sccb->header.response_code) { case 0x0020: set_bit(id, sclp_storage_ids); - for (i = 0; i < sccb->assigned; i++) - sclp_unassign_storage(sccb->entries[i] >> 16); + for (i = 0; i < sccb->assigned; i++) { + if (sccb->entries[i]) + sclp_unassign_storage(sccb->entries[i] >> 16); + } break; default: rc = -EIO; -- cgit v1.1 From 18036b5866b5e407a28f444a80de186a5d7df767 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Aug 2011 16:35:32 +0100 Subject: ASoC: Correct element count for WM8996 sidetone HPF I can count. Honest. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm8996.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c index 0936ae5..0cdb9d1 100644 --- a/sound/soc/codecs/wm8996.c +++ b/sound/soc/codecs/wm8996.c @@ -420,7 +420,7 @@ static const char *sidetone_hpf_text[] = { }; static const struct soc_enum sidetone_hpf = - SOC_ENUM_SINGLE(WM8996_SIDETONE, 7, 6, sidetone_hpf_text); + SOC_ENUM_SINGLE(WM8996_SIDETONE, 7, 7, sidetone_hpf_text); static const char *hpf_mode_text[] = { "HiFi", "Custom", "Voice" -- cgit v1.1 From f1c39625d63c9f8eba8f036429c10a9cb9e32920 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 24 Aug 2011 09:54:24 -0700 Subject: xen: use non-tracing preempt in xen_clocksource_read() The tracing code used sched_clock() to get tracing timestamps, which ends up calling xen_clocksource_read(). xen_clocksource_read() must disable preemption, but if preemption tracing is enabled, this results in infinite recursion. I've only noticed this when boot-time tracing tests are enabled, but it seems like a generic bug. It looks like it would also affect kvm_clocksource_read(). Reported-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge Cc: Avi Kivity Cc: Marcelo Tosatti --- arch/x86/xen/time.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 5158c50..163b467 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -168,9 +168,10 @@ cycle_t xen_clocksource_read(void) struct pvclock_vcpu_time_info *src; cycle_t ret; - src = &get_cpu_var(xen_vcpu)->time; + preempt_disable_notrace(); + src = &__get_cpu_var(xen_vcpu)->time; ret = pvclock_clocksource_read(src); - put_cpu_var(xen_vcpu); + preempt_enable_notrace(); return ret; } -- cgit v1.1 From 66cb54bd24086b2d871a03035de9b0e79b2b725e Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Wed, 24 Aug 2011 00:44:32 +0400 Subject: carl9170: Fix mismatch in carl9170_op_set_key mutex lock-unlock If is_main_vif(ar, vif) reports that we have to fall back to software encryption, we goto err_softw; before locking ar->mutex. As a result, we have unprotected call to carl9170_set_operating_mode and unmatched mutex_unlock. The patch fix the issue by adding mutex_lock before goto. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Cc: Acked-By: Christian Lamparter Signed-off-by: John W. Linville --- drivers/net/wireless/ath/carl9170/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 0122930..0474e663 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1066,8 +1066,10 @@ static int carl9170_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, * the high througput speed in 802.11n networks. */ - if (!is_main_vif(ar, vif)) + if (!is_main_vif(ar, vif)) { + mutex_lock(&ar->mutex); goto err_softw; + } /* * While the hardware supports *catch-all* key, for offloading -- cgit v1.1 From 8b2a3827bb12430d932cd479b22d906baf08c212 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Wed, 24 Aug 2011 21:38:07 +0530 Subject: ath9k: Fix PS wrappers in ath9k_set_coverage_class this callback is called during suspend/resume and also via iw command. it configures parameters like sifs, slottime, acktimeout in ath9k_hw_init_global_settings where few REG_READ, REG_RMW are also done and hence the need for PS wrappers Cc: stable@kernel.org Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 9098aaa..6530694 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -2283,7 +2283,11 @@ static void ath9k_set_coverage_class(struct ieee80211_hw *hw, u8 coverage_class) mutex_lock(&sc->mutex); ah->coverage_class = coverage_class; + + ath9k_ps_wakeup(sc); ath9k_hw_init_global_settings(ah); + ath9k_ps_restore(sc); + mutex_unlock(&sc->mutex); } -- cgit v1.1 From b569ad34926defcff998f214afeb260331165985 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Aug 2011 15:07:35 -0400 Subject: NFSv4: nfs4_proc_async_renew should use a GFP_NOFS allocation We shouldn't allow the renew daemon to do direct reclaim on the NFS partition. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8c77039..776b41a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3397,7 +3397,7 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) if (!atomic_inc_not_zero(&clp->cl_count)) return -EIO; - data = kmalloc(sizeof(*data), GFP_KERNEL); + data = kmalloc(sizeof(*data), GFP_NOFS); if (data == NULL) return -ENOMEM; data->client = clp; -- cgit v1.1 From 8534d4ec055d854be6c94e8e5654fa87678ea5f7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Aug 2011 15:07:37 -0400 Subject: NFSv4: nfs4_proc_renew should be declared static Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 -- fs/nfs/nfs4proc.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1ec1a85..58adfb6 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -237,8 +237,6 @@ extern const struct inode_operations nfs4_dir_inode_operations; extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); -extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); -extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 776b41a..b358ec1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3386,7 +3386,7 @@ static const struct rpc_call_ops nfs4_renew_ops = { .rpc_release = nfs4_renew_release, }; -int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -3406,7 +3406,7 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) &nfs4_renew_ops, data); } -int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], -- cgit v1.1 From 2f60ea6b8ceda61ae08bef71a652eac36ec193b3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Aug 2011 15:07:37 -0400 Subject: NFSv4: The NFSv4.0 client must send RENEW calls if it holds a delegation RFC3530 states that if the client holds a delegation, then it is obliged to continue to send RENEW calls once every lease period in order to allow the server to return NFS4ERR_CB_PATH_DOWN if the callback path is unreachable. This is not required for NFSv4.1, since the server can at any time set the SEQ4_STATUS_CB_PATH_DOWN_SESSION in any SEQUENCE operation. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 5 ++++- fs/nfs/nfs4proc.c | 8 ++++++-- fs/nfs/nfs4renewd.c | 12 +++++++++--- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 58adfb6..e1b6607 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -56,6 +56,9 @@ enum nfs4_session_state { NFS4_SESSION_DRAINING, }; +#define NFS4_RENEW_TIMEOUT 0x01 +#define NFS4_RENEW_DELEGATION_CB 0x02 + struct nfs4_minor_version_ops { u32 minor_version; @@ -225,7 +228,7 @@ struct nfs4_state_recovery_ops { }; struct nfs4_state_maintenance_ops { - int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *); + int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned); struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); int (*renew_lease)(struct nfs_client *, struct rpc_cred *); }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b358ec1..e89940a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3386,7 +3386,7 @@ static const struct rpc_call_ops nfs4_renew_ops = { .rpc_release = nfs4_renew_release, }; -static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -3395,6 +3395,8 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) }; struct nfs4_renewdata *data; + if (renew_flags == 0) + return 0; if (!atomic_inc_not_zero(&clp->cl_count)) return -EIO; data = kmalloc(sizeof(*data), GFP_NOFS); @@ -5504,11 +5506,13 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ return rpc_run_task(&task_setup_data); } -static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) { struct rpc_task *task; int ret = 0; + if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) + return 0; task = _nfs41_proc_sequence(clp, cred); if (IS_ERR(task)) ret = PTR_ERR(task); diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index df8e7f3..dc484c0 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -60,6 +60,7 @@ nfs4_renew_state(struct work_struct *work) struct rpc_cred *cred; long lease; unsigned long last, now; + unsigned renew_flags = 0; ops = clp->cl_mvops->state_renewal_ops; dprintk("%s: start\n", __func__); @@ -72,18 +73,23 @@ nfs4_renew_state(struct work_struct *work) last = clp->cl_last_renewal; now = jiffies; /* Are we close to a lease timeout? */ - if (time_after(now, last + lease/3)) { + if (time_after(now, last + lease/3)) + renew_flags |= NFS4_RENEW_TIMEOUT; + if (nfs_delegations_present(clp)) + renew_flags |= NFS4_RENEW_DELEGATION_CB; + + if (renew_flags != 0) { cred = ops->get_state_renewal_cred_locked(clp); spin_unlock(&clp->cl_lock); if (cred == NULL) { - if (!nfs_delegations_present(clp)) { + if (!(renew_flags & NFS4_RENEW_DELEGATION_CB)) { set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); goto out; } nfs_expire_all_delegations(clp); } else { /* Queue an asynchronous RENEW. */ - ops->sched_state_renewal(clp, cred); + ops->sched_state_renewal(clp, cred, renew_flags); put_rpccred(cred); goto out_exp; } -- cgit v1.1 From 042b60beb410caf68f576d63d6849d0f0a545eb0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Aug 2011 15:07:37 -0400 Subject: NFSv4: renewd needs to be able to handle the NFS4ERR_CB_PATH_DOWN error The NFSv4 spec does not specify that the server must repeat that error, so in order to avoid having the delegations revoked, we should handle it immediately. Also note that NFS4ERR_CB_PATH_DOWN does in fact renew the lease... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 8 ++++++-- fs/nfs/nfs4state.c | 6 ++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e1b6607..3e93e9a 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -350,6 +350,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); extern void nfs4_schedule_lease_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); +extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_recall_slot(struct nfs_client *clp); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e89940a..4700fae 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3374,9 +3374,13 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { /* Unless we're shutting down, schedule state recovery! */ - if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) + if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0) + return; + if (task->tk_status != NFS4ERR_CB_PATH_DOWN) { nfs4_schedule_lease_recovery(clp); - return; + return; + } + nfs4_schedule_path_down_recovery(clp); } do_renew_lease(clp, timestamp); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 72ab97e..39914be 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1038,6 +1038,12 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } +void nfs4_schedule_path_down_recovery(struct nfs_client *clp) +{ + nfs_handle_cb_pathdown(clp); + nfs4_schedule_state_manager(clp); +} + static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) { -- cgit v1.1 From 3bdf28feafc52864bd7f17b39deec64833a89d19 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 23 Aug 2011 16:48:26 -0500 Subject: ASoC: MPC5200: replace of_device with platform_device 'struct of_device' no longer exists, and its functionality has been merged into platform_device. Update the MPC5200 audio DMA driver (mpc5200_dma) accordingly. This fixes a build break. Signed-off-by: Timur Tabi Acked-by: Liam Girdwood Signed-off-by: Mark Brown Cc: stable@kernel.org --- sound/soc/fsl/mpc5200_dma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c index fd0dc46..5c6c245 100644 --- a/sound/soc/fsl/mpc5200_dma.c +++ b/sound/soc/fsl/mpc5200_dma.c @@ -369,7 +369,7 @@ static struct snd_soc_platform_driver mpc5200_audio_dma_platform = { .pcm_free = &psc_dma_free, }; -static int mpc5200_hpcd_probe(struct of_device *op) +static int mpc5200_hpcd_probe(struct platform_device *op) { phys_addr_t fifo; struct psc_dma *psc_dma; @@ -487,7 +487,7 @@ out_unmap: return ret; } -static int mpc5200_hpcd_remove(struct of_device *op) +static int mpc5200_hpcd_remove(struct platform_device *op) { struct psc_dma *psc_dma = dev_get_drvdata(&op->dev); @@ -519,7 +519,7 @@ MODULE_DEVICE_TABLE(of, mpc5200_hpcd_match); static struct platform_driver mpc5200_hpcd_of_driver = { .probe = mpc5200_hpcd_probe, .remove = mpc5200_hpcd_remove, - .dev = { + .driver = { .owner = THIS_MODULE, .name = "mpc5200-pcm-audio", .of_match_table = mpc5200_hpcd_match, -- cgit v1.1 From b7ab83edba2d50583bc9520431618489379718b2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 24 Aug 2011 21:40:56 +0200 Subject: PM: Use spinlock instead of mutex in clock management functions The lock member of struct pm_clk_data is of type struct mutex, which is a problem, because the suspend and resume routines defined in drivers/base/power/clock_ops.c cannot be executed with interrupts disabled for this reason. Modify struct pm_clk_data so that its lock member is a spinlock. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- drivers/base/power/clock_ops.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index a846b2f..2c18d58 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -19,7 +19,7 @@ struct pm_clk_data { struct list_head clock_list; - struct mutex lock; + spinlock_t lock; }; enum pce_status { @@ -73,9 +73,9 @@ int pm_clk_add(struct device *dev, const char *con_id) } } - mutex_lock(&pcd->lock); + spin_lock_irq(&pcd->lock); list_add_tail(&ce->node, &pcd->clock_list); - mutex_unlock(&pcd->lock); + spin_unlock_irq(&pcd->lock); return 0; } @@ -83,8 +83,8 @@ int pm_clk_add(struct device *dev, const char *con_id) * __pm_clk_remove - Destroy PM clock entry. * @ce: PM clock entry to destroy. * - * This routine must be called under the mutex protecting the PM list of clocks - * corresponding the the @ce's device. + * This routine must be called under the spinlock protecting the PM list of + * clocks corresponding the the @ce's device. */ static void __pm_clk_remove(struct pm_clock_entry *ce) { @@ -123,7 +123,7 @@ void pm_clk_remove(struct device *dev, const char *con_id) if (!pcd) return; - mutex_lock(&pcd->lock); + spin_lock_irq(&pcd->lock); list_for_each_entry(ce, &pcd->clock_list, node) { if (!con_id && !ce->con_id) { @@ -137,7 +137,7 @@ void pm_clk_remove(struct device *dev, const char *con_id) } } - mutex_unlock(&pcd->lock); + spin_unlock_irq(&pcd->lock); } /** @@ -158,7 +158,7 @@ int pm_clk_init(struct device *dev) } INIT_LIST_HEAD(&pcd->clock_list); - mutex_init(&pcd->lock); + spin_lock_init(&pcd->lock); dev->power.subsys_data = pcd; return 0; } @@ -181,12 +181,12 @@ void pm_clk_destroy(struct device *dev) dev->power.subsys_data = NULL; - mutex_lock(&pcd->lock); + spin_lock_irq(&pcd->lock); list_for_each_entry_safe_reverse(ce, c, &pcd->clock_list, node) __pm_clk_remove(ce); - mutex_unlock(&pcd->lock); + spin_unlock_irq(&pcd->lock); kfree(pcd); } @@ -220,13 +220,14 @@ int pm_clk_suspend(struct device *dev) { struct pm_clk_data *pcd = __to_pcd(dev); struct pm_clock_entry *ce; + unsigned long flags; dev_dbg(dev, "%s()\n", __func__); if (!pcd) return 0; - mutex_lock(&pcd->lock); + spin_lock_irqsave(&pcd->lock, flags); list_for_each_entry_reverse(ce, &pcd->clock_list, node) { if (ce->status == PCE_STATUS_NONE) @@ -238,7 +239,7 @@ int pm_clk_suspend(struct device *dev) } } - mutex_unlock(&pcd->lock); + spin_unlock_irqrestore(&pcd->lock, flags); return 0; } @@ -251,13 +252,14 @@ int pm_clk_resume(struct device *dev) { struct pm_clk_data *pcd = __to_pcd(dev); struct pm_clock_entry *ce; + unsigned long flags; dev_dbg(dev, "%s()\n", __func__); if (!pcd) return 0; - mutex_lock(&pcd->lock); + spin_lock_irqsave(&pcd->lock, flags); list_for_each_entry(ce, &pcd->clock_list, node) { if (ce->status == PCE_STATUS_NONE) @@ -269,7 +271,7 @@ int pm_clk_resume(struct device *dev) } } - mutex_unlock(&pcd->lock); + spin_unlock_irqrestore(&pcd->lock, flags); return 0; } @@ -344,6 +346,7 @@ int pm_clk_suspend(struct device *dev) { struct pm_clk_data *pcd = __to_pcd(dev); struct pm_clock_entry *ce; + unsigned long flags; dev_dbg(dev, "%s()\n", __func__); @@ -351,12 +354,12 @@ int pm_clk_suspend(struct device *dev) if (!pcd || !dev->driver) return 0; - mutex_lock(&pcd->lock); + spin_lock_irqsave(&pcd->lock, flags); list_for_each_entry_reverse(ce, &pcd->clock_list, node) clk_disable(ce->clk); - mutex_unlock(&pcd->lock); + spin_unlock_irqrestore(&pcd->lock, flags); return 0; } @@ -369,6 +372,7 @@ int pm_clk_resume(struct device *dev) { struct pm_clk_data *pcd = __to_pcd(dev); struct pm_clock_entry *ce; + unsigned long flags; dev_dbg(dev, "%s()\n", __func__); @@ -376,12 +380,12 @@ int pm_clk_resume(struct device *dev) if (!pcd || !dev->driver) return 0; - mutex_lock(&pcd->lock); + spin_lock_irqsave(&pcd->lock, flags); list_for_each_entry(ce, &pcd->clock_list, node) clk_enable(ce->clk); - mutex_unlock(&pcd->lock); + spin_unlock_irqrestore(&pcd->lock, flags); return 0; } -- cgit v1.1 From 5a50a01bf00c8191073fdf518e1af1e950ac3af5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 24 Aug 2011 21:41:08 +0200 Subject: sh-sci / PM: Use power.irq_safe Since sci_port_enable() and sci_port_disable() may be run with interrupts off and they execute pm_runtime_get_sync() and pm_runtime_put_sync(), respectively, the SCI device's power.irq_safe flag has to be set to indicate that it is safe to execute runtime PM callbacks for this device with interrupts off. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- drivers/tty/serial/sh-sci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 2ec57b2..a9414fa 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1913,6 +1913,7 @@ static int __devinit sci_init_single(struct platform_device *dev, port->dev = &dev->dev; + pm_runtime_irq_safe(&dev->dev); pm_runtime_enable(&dev->dev); } -- cgit v1.1 From 5c3f96b20954fd6932bcfb1a860fa1d8b5b22ab0 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 24 Aug 2011 22:38:43 +0200 Subject: ARM: mach-shmobile: sh7372 LCDC1 suspend fix Associate the HDMI clock together with LCDC1 on sh7372. Without this patch Suspend-to-RAM hangs on the boards AP4EVB and Mackerel. The code hangs in the LCDC driver where the software is waiting forever for the hardware to power down. By explicitly associating the HDMI clock with LCDC1 we can make sure the HDMI clock is enabled using Runtime PM whenever the driver is accessing the hardware. This HDMI and LCDC1 dependency is documented in the sh7372 data sheet. Older kernels did work as expected but the recently merged (3.1-rc) 794d78f drivers: sh: late disabling of clocks V2 introduced code to turn off clocks lacking software reference which happens to include the HDMI clock that is needed by LCDC1 to operate as expected. Signed-off-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-shmobile/board-ap4evb.c | 1 + arch/arm/mach-shmobile/board-mackerel.c | 1 + arch/arm/mach-shmobile/clock-sh7372.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 9e0856b..fadbe5b 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -1412,6 +1412,7 @@ static void __init ap4evb_init(void) fsi_init_pm_clock(); sh7372_pm_init(); pm_clk_add(&fsi_device.dev, "spu2"); + pm_clk_add(&hdmi_lcdc_device.dev, "hdmi"); } static void __init ap4evb_timer_init(void) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index d41c01f..0ea71f8 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -1588,6 +1588,7 @@ static void __init mackerel_init(void) hdmi_init_pm_clock(); sh7372_pm_init(); pm_clk_add(&fsi_device.dev, "spu2"); + pm_clk_add(&hdmi_lcdc_device.dev, "hdmi"); } static void __init mackerel_timer_init(void) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 6b1619a..e6e11e4 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -655,6 +655,8 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[MSTP406]), /* USB1 */ CLKDEV_DEV_ID("sh_keysc.0", &mstp_clks[MSTP403]), /* KEYSC */ + CLKDEV_ICK_ID("hdmi", "sh_mobile_lcdc_fb.1", + &div6_reparent_clks[DIV6_HDMI]), CLKDEV_ICK_ID("ick", "sh-mobile-hdmi", &div6_reparent_clks[DIV6_HDMI]), CLKDEV_ICK_ID("icka", "sh_fsi2", &div6_reparent_clks[DIV6_FSIA]), CLKDEV_ICK_ID("ickb", "sh_fsi2", &div6_reparent_clks[DIV6_FSIB]), -- cgit v1.1 From 1b965f1891eac2d8583b5248ef0bcbc91c201e27 Mon Sep 17 00:00:00 2001 From: Omar Ramirez Luna Date: Wed, 24 Aug 2011 15:07:04 -0500 Subject: staging: tidspbridge: fix compilation on dsp clock functions Seen on v3.1-rc3, patch: omap: mcbsp: Drop in-driver transfer support bafe2721a0fbd1cc1af04384133684f660f3658e Removed code that now cause tidspbridge to break while compiling. Signed-off-by: Omar Ramirez Luna Signed-off-by: Greg Kroah-Hartman --- drivers/staging/tidspbridge/core/dsp-clock.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/tidspbridge/core/dsp-clock.c b/drivers/staging/tidspbridge/core/dsp-clock.c index 589a055..3d1279c 100644 --- a/drivers/staging/tidspbridge/core/dsp-clock.c +++ b/drivers/staging/tidspbridge/core/dsp-clock.c @@ -209,7 +209,6 @@ int dsp_clk_enable(enum dsp_clk_id clk_id) break; #ifdef CONFIG_OMAP_MCBSP case MCBSP_CLK: - omap_mcbsp_set_io_type(MCBSP_ID(clk_id), OMAP_MCBSP_POLL_IO); omap_mcbsp_request(MCBSP_ID(clk_id)); omap2_mcbsp_set_clks_src(MCBSP_ID(clk_id), MCBSP_CLKS_PAD_SRC); break; -- cgit v1.1 From c8d47631a48f254d062db8084776d1fb24785e7b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 9 Aug 2011 20:17:29 +0200 Subject: i2c-nomadik: fix kerneldoc warning There was a missing struct item in the kerneldoc, add it and fix another pretty-printing formatting issue with a missing space. Signed-off-by: Linus Walleij Signed-off-by: Ben Dooks --- drivers/i2c/busses/i2c-nomadik.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index f9b8854..b228e09 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -146,6 +146,7 @@ struct i2c_nmk_client { * @stop: stop condition * @xfer_complete: acknowledge completion for a I2C message * @result: controller propogated result + * @regulator: pointer to i2c regulator * @busy: Busy doing transfer */ struct nmk_i2c_dev { @@ -509,7 +510,7 @@ static int write_i2c(struct nmk_i2c_dev *dev) if (timeout < 0) { dev_err(&dev->pdev->dev, - "wait_for_completion_timeout" + "wait_for_completion_timeout " "returned %d waiting for event\n", timeout); status = timeout; } -- cgit v1.1 From caca9510ff4e5d842c0589110243d60927836222 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 24 Aug 2011 15:55:30 -0700 Subject: firmware loader: allow builtin firmware load even if usermodehelper is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit a144c6a6c924 ("PM: Print a warning if firmware is requested when tasks are frozen") we not only printed a warning if somebody tried to load the firmware when tasks are frozen - we also failed the load. But that check was done before the check for built-in firmware, and then when we disallowed usermode helpers during bootup (commit 288d5abec831: "Boot up with usermodehelper disabled"), that actually means that built-in modules can no longer load their firmware even if the firmware is built in too. Which used to work, and some people depended on it for the R100 driver. So move the test for usermodehelper_is_disabled() down, to after checking the built-in firmware. This should fix: https://bugzilla.kernel.org/show_bug.cgi?id=40952 Reported-by: James Cloos Bisected-by: Elimar Riesebieter Cc: Michel Dänzer Cc: Rafael Wysocki Cc: Greg Kroah-Hartman Cc: Valdis Kletnieks Signed-off-by: Linus Torvalds --- drivers/base/firmware_class.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index bbb03e6..06ed6b4 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -521,11 +521,6 @@ static int _request_firmware(const struct firmware **firmware_p, if (!firmware_p) return -EINVAL; - if (WARN_ON(usermodehelper_is_disabled())) { - dev_err(device, "firmware: %s will not be loaded\n", name); - return -EBUSY; - } - *firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL); if (!firmware) { dev_err(device, "%s: kmalloc(struct firmware) failed\n", @@ -539,6 +534,12 @@ static int _request_firmware(const struct firmware **firmware_p, return 0; } + if (WARN_ON(usermodehelper_is_disabled())) { + dev_err(device, "firmware: %s will not be loaded\n", name); + retval = -EBUSY; + goto out; + } + if (uevent) dev_dbg(device, "firmware: requesting %s\n", name); -- cgit v1.1 From 814fd609fa98f3667974d8c27c4d75ef4ce041ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20de=20Peslo=C3=BCan?= Date: Tue, 23 Aug 2011 23:31:42 +0000 Subject: MAINTAINERS: Update GIT trees for network development MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove -2.6 from net and net-next tree names. Signed-off-by: Nicolas de Pesloüan Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 069ee3b..c94a898 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4450,8 +4450,8 @@ M: "David S. Miller" L: netdev@vger.kernel.org W: http://www.linuxfoundation.org/en/Net W: http://patchwork.ozlabs.org/project/netdev/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git S: Maintained F: net/ F: include/net/ -- cgit v1.1 From e05c4ad3ed874ee4f5e2c969e55d318ec654332c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 23 Aug 2011 22:54:37 +0000 Subject: mcast: Fix source address selection for multicast listener report Should check use count of include mode filter instead of total number of include mode filters. Signed-off-by: Zheng Yan Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 2 +- net/ipv6/mcast.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 283c0a2..d577199 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -767,7 +767,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) break; for (i=0; isfcount[MCAST_INCLUDE] || + if (psf->sf_count[MCAST_INCLUDE] || pmc->sfcount[MCAST_EXCLUDE] != psf->sf_count[MCAST_EXCLUDE]) continue; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 3e6ebcd..ee7839f 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1059,7 +1059,7 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, break; for (i=0; imca_sfcount[MCAST_INCLUDE] || + if (psf->sf_count[MCAST_INCLUDE] || pmc->mca_sfcount[MCAST_EXCLUDE] != psf->sf_count[MCAST_EXCLUDE]) continue; -- cgit v1.1 From 4b275d7efa1c4412f0d572fcd7f78ed0919370b3 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 23 Aug 2011 22:54:33 +0000 Subject: bridge: Pseudo-header required for the checksum of ICMPv6 Checksum of ICMPv6 is not properly computed because the pseudo header is not used. Thus, the MLD packet gets dropped by the bridge. Signed-off-by: Zheng Yan Reported-by: Ang Way Chuang Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2d85ca7..22d2d1a 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1520,16 +1520,23 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, err = pskb_trim_rcsum(skb2, len); if (err) goto out; + err = -EINVAL; } + ip6h = ipv6_hdr(skb2); + switch (skb2->ip_summed) { case CHECKSUM_COMPLETE: - if (!csum_fold(skb2->csum)) + if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len, + IPPROTO_ICMPV6, skb2->csum)) break; /*FALLTHROUGH*/ case CHECKSUM_NONE: - skb2->csum = 0; - if (skb_checksum_complete(skb2)) + skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, + &ip6h->daddr, + skb2->len, + IPPROTO_ICMPV6, 0)); + if (__skb_checksum_complete(skb2)) goto out; } -- cgit v1.1 From 22df13319d1fec30b8f9bcaadc295829647109bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2011 19:57:05 +0000 Subject: bridge: fix a possible use after free br_multicast_ipv6_rcv() can call pskb_trim_rcsum() and therefore skb head can be reallocated. Cache icmp6_type field instead of dereferencing twice the struct icmp6hdr pointer. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 22d2d1a..995cbe0 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1456,7 +1456,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, { struct sk_buff *skb2; const struct ipv6hdr *ip6h; - struct icmp6hdr *icmp6h; + u8 icmp6_type; u8 nexthdr; unsigned len; int offset; @@ -1502,9 +1502,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, __skb_pull(skb2, offset); skb_reset_transport_header(skb2); - icmp6h = icmp6_hdr(skb2); + icmp6_type = icmp6_hdr(skb2)->icmp6_type; - switch (icmp6h->icmp6_type) { + switch (icmp6_type) { case ICMPV6_MGM_QUERY: case ICMPV6_MGM_REPORT: case ICMPV6_MGM_REDUCTION: @@ -1544,7 +1544,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, BR_INPUT_SKB_CB(skb)->igmp = 1; - switch (icmp6h->icmp6_type) { + switch (icmp6_type) { case ICMPV6_MGM_REPORT: { struct mld_msg *mld; -- cgit v1.1 From 20e6074eb8e096b3a595c093d1cb222f378cd671 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Aug 2011 19:32:42 +0000 Subject: arp: fix rcu lockdep splat in arp_process() Dave Jones reported a lockdep splat triggered by an arp_process() call from parp_redo(). Commit faa9dcf793be (arp: RCU changes) is the origin of the bug, since it assumed arp_process() was called under rcu_read_lock(), which is not true in this particular path. Instead of adding rcu_read_lock() in parp_redo(), I chose to add it in neigh_proxy_process() to take care of IPv6 side too. =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- include/linux/inetdevice.h:209 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 4 locks held by setfiles/2123: #0: (&sb->s_type->i_mutex_key#13){+.+.+.}, at: [] walk_component+0x1ef/0x3e8 #1: (&isec->lock){+.+.+.}, at: [] inode_doinit_with_dentry+0x3f/0x41f #2: (&tbl->proxy_timer){+.-...}, at: [] run_timer_softirq+0x157/0x372 #3: (class){+.-...}, at: [] neigh_proxy_process +0x36/0x103 stack backtrace: Pid: 2123, comm: setfiles Tainted: G W 3.1.0-0.rc2.git7.2.fc16.x86_64 #1 Call Trace: [] lockdep_rcu_dereference+0xa7/0xaf [] __in_dev_get_rcu+0x55/0x5d [] arp_process+0x25/0x4d7 [] parp_redo+0xe/0x10 [] neigh_proxy_process+0x9a/0x103 [] run_timer_softirq+0x218/0x372 [] ? run_timer_softirq+0x157/0x372 [] ? neigh_stat_seq_open+0x41/0x41 [] ? mark_held_locks+0x6d/0x95 [] __do_softirq+0x112/0x25a [] call_softirq+0x1c/0x30 [] do_softirq+0x4b/0xa2 [] irq_exit+0x5d/0xcf [] smp_apic_timer_interrupt+0x7c/0x8a [] apic_timer_interrupt+0x73/0x80 [] ? trace_hardirqs_on_caller+0x121/0x158 [] ? __slab_free+0x30/0x24c [] ? __slab_free+0x2e/0x24c [] ? inode_doinit_with_dentry+0x2e9/0x41f [] ? inode_doinit_with_dentry+0x2e9/0x41f [] ? inode_doinit_with_dentry+0x2e9/0x41f [] kfree+0x108/0x131 [] inode_doinit_with_dentry+0x2e9/0x41f [] selinux_d_instantiate+0x1c/0x1e [] security_d_instantiate+0x21/0x23 [] d_instantiate+0x5c/0x61 [] d_splice_alias+0xbc/0xd2 [] ext4_lookup+0xba/0xeb [] d_alloc_and_lookup+0x45/0x6b [] walk_component+0x215/0x3e8 [] lookup_last+0x3b/0x3d [] path_lookupat+0x82/0x2af [] ? might_fault+0xa5/0xac [] ? might_fault+0x5c/0xac [] ? getname_flags+0x31/0x1ca [] do_path_lookup+0x28/0x97 [] user_path_at+0x59/0x96 [] ? cp_new_stat+0xf7/0x10d [] vfs_fstatat+0x44/0x6e [] vfs_lstat+0x1e/0x20 [] sys_newlstat+0x1a/0x33 [] ? trace_hardirqs_on_caller+0x121/0x158 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Reported-by: Dave Jones Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8fab9b0..1334d7e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1319,11 +1319,15 @@ static void neigh_proxy_process(unsigned long arg) if (tdif <= 0) { struct net_device *dev = skb->dev; + __skb_unlink(skb, &tbl->proxy_queue); - if (tbl->proxy_redo && netif_running(dev)) + if (tbl->proxy_redo && netif_running(dev)) { + rcu_read_lock(); tbl->proxy_redo(skb); - else + rcu_read_unlock(); + } else { kfree_skb(skb); + } dev_put(dev); } else if (!sched_next || tdif < sched_next) -- cgit v1.1 From c6f59d13e24187ff95427a9f4a5a7e14fb8faf5a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 24 Aug 2011 17:56:15 -0700 Subject: ibmveth: Fix leak when recycling skb and hypervisor returns error If h_add_logical_lan_buffer returns an error we need to free the skb. Signed-off-by: Anton Blanchard Cc: stable Signed-off-by: David S. Miller --- drivers/net/ibmveth.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index ba99af0..3e66792 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -395,7 +395,7 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada } /* recycle the current buffer on the rx queue */ -static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) +static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) { u32 q_index = adapter->rx_queue.index; u64 correlator = adapter->rx_queue.queue_addr[q_index].correlator; @@ -403,6 +403,7 @@ static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) unsigned int index = correlator & 0xffffffffUL; union ibmveth_buf_desc desc; unsigned long lpar_rc; + int ret = 1; BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); BUG_ON(index >= adapter->rx_buff_pool[pool].size); @@ -410,7 +411,7 @@ static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) if (!adapter->rx_buff_pool[pool].active) { ibmveth_rxq_harvest_buffer(adapter); ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[pool]); - return; + goto out; } desc.fields.flags_len = IBMVETH_BUF_VALID | @@ -423,12 +424,16 @@ static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) netdev_dbg(adapter->netdev, "h_add_logical_lan_buffer failed " "during recycle rc=%ld", lpar_rc); ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator); + ret = 0; } if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { adapter->rx_queue.index = 0; adapter->rx_queue.toggle = !adapter->rx_queue.toggle; } + +out: + return ret; } static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter) @@ -1084,8 +1089,9 @@ restart_poll: if (rx_flush) ibmveth_flush_buffer(skb->data, length + offset); + if (!ibmveth_rxq_recycle_buffer(adapter)) + kfree_skb(skb); skb = new_skb; - ibmveth_rxq_recycle_buffer(adapter); } else { ibmveth_rxq_harvest_buffer(adapter); skb_reserve(skb, offset); -- cgit v1.1 From bc909d9ddbf7778371e36a651d6e4194b1cc7d4c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 24 Aug 2011 19:45:03 -0700 Subject: sendmmsg/sendmsg: fix unsafe user pointer access Dereferencing a user pointer directly from kernel-space without going through the copy_from_user family of functions is a bad idea. Two of such usages can be found in the sendmsg code path called from sendmmsg, added by commit c71d8ebe7a4496fb7231151cb70a6baa0cb56f9a upstream. commit 5b47b8038f183b44d2d8ff1c7d11a5c1be706b34 in the 3.0-stable tree. Usages are performed through memcmp() and memcpy() directly. Fix those by using the already copied msg_sys structure instead of the __user *msg structure. Note that msg_sys can be set to NULL by verify_compat_iovec() or verify_iovec(), which requires additional NULL pointer checks. Signed-off-by: Mathieu Desnoyers Signed-off-by: David Goulet CC: Tetsuo Handa CC: Anton Blanchard CC: David S. Miller CC: stable Signed-off-by: David S. Miller --- net/socket.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/socket.c b/net/socket.c index 24a7740..ffe92ca 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1965,8 +1965,9 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, * used_address->name_len is initialized to UINT_MAX so that the first * destination address never matches. */ - if (used_address && used_address->name_len == msg_sys->msg_namelen && - !memcmp(&used_address->name, msg->msg_name, + if (used_address && msg_sys->msg_name && + used_address->name_len == msg_sys->msg_namelen && + !memcmp(&used_address->name, msg_sys->msg_name, used_address->name_len)) { err = sock_sendmsg_nosec(sock, msg_sys, total_len); goto out_freectl; @@ -1978,8 +1979,9 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, */ if (used_address && err >= 0) { used_address->name_len = msg_sys->msg_namelen; - memcpy(&used_address->name, msg->msg_name, - used_address->name_len); + if (msg_sys->msg_name) + memcpy(&used_address->name, msg_sys->msg_name, + used_address->name_len); } out_freectl: -- cgit v1.1 From 5ef56c8fecedf403a346d02140e52a072d693d6b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Aug 2011 14:42:51 +1000 Subject: md: report failure if a 'set faulty' request doesn't. Sometimes a device will refuse to be set faulty. e.g. RAID1 will never let the last working device become faulty. So check if "md_error()" did manage to set the faulty flag and fail with EBUSY if it didn't. Resolves-Debian-Bug: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=601198 Reported-by: Mike Hommey Signed-off-by: NeilBrown --- drivers/md/md.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 8e221a2..1cd9bfb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2561,7 +2561,10 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { md_error(rdev->mddev, rdev); - err = 0; + if (test_bit(Faulty, &rdev->flags)) + err = 0; + else + err = -EBUSY; } else if (cmd_match(buf, "remove")) { if (rdev->raid_disk >= 0) err = -EBUSY; @@ -5983,6 +5986,8 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev) return -ENODEV; md_error(mddev, rdev); + if (!test_bit(Faulty, &rdev->flags)) + return -EBUSY; return 0; } -- cgit v1.1 From aeb9b211849621f592288ed5ad694de9eeaae87a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Aug 2011 14:43:08 +1000 Subject: md: ensure changes to 'write-mostly' are reflected in metadata. The 'write-mostly' flag can be changed through sysfs. With 0.90 metadata, those changes are reflected in the metadata. For 1.x metadata, they aren't. So fix super_1_sync to record 'write-mostly' status. Signed-off-by: NeilBrown --- drivers/md/md.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 1cd9bfb..9a88023 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1738,6 +1738,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); + if (test_bit(WriteMostly, &rdev->flags)) + sb->devflags |= WriteMostly1; + else + sb->devflags &= ~WriteMostly1; + if (mddev->bitmap && mddev->bitmap_info.file == NULL) { sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset); sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); -- cgit v1.1 From a5bf4df0c88b88d34b6f0e3bc8a402dac7d14611 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Aug 2011 14:43:34 +1000 Subject: md: use REQ_NOIDLE flag in md_super_write() Queue idling is used for the anticipation of immediate sequencial I/O's but md_super_write() is a kind of one- shot operation, coupled with md_super_wait(), so the idling in this case will be just a waste of time. Specifying REQ_NOIDLE prevents it. Instead of adding the flag to submit_bio() directly, use pre-defined macro WRITE_FLUSH_FUA. Signed-off-by: Namhyung Kim Signed-off-by: NeilBrown --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9a88023..aca6117 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -848,7 +848,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, bio->bi_end_io = super_written; atomic_inc(&mddev->pending_writes); - submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio); + submit_bio(WRITE_FLUSH_FUA, bio); } void md_super_wait(mddev_t *mddev) -- cgit v1.1 From 1b6afa17581027218088a18a9ceda600e0ddba7a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Aug 2011 14:43:53 +1000 Subject: md/linear: avoid corrupting structure while waiting for rcu_free to complete. I don't know what I was thinking putting 'rcu' after a dynamically sized array! The array could still be in use when we call rcu_free() (That is the point) so we mustn't corrupt it. Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/linear.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/linear.h b/drivers/md/linear.h index 0ce29b6..2f2da05 100644 --- a/drivers/md/linear.h +++ b/drivers/md/linear.h @@ -10,9 +10,9 @@ typedef struct dev_info dev_info_t; struct linear_private_data { + struct rcu_head rcu; sector_t array_sectors; dev_info_t disks[0]; - struct rcu_head rcu; }; -- cgit v1.1 From 35d851df23b093ee027f827fed2213ae5e88fc7a Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 25 Aug 2011 14:21:37 +0200 Subject: HID: magicmouse: ignore 'ivalid report id' while switching modes, v2 This is basically a more generic respin of 23746a6 ("HID: magicmouse: ignore 'ivalid report id' while switching modes") which got reverted later by c3a492. It turns out that on some configurations, this is actually still the case and we are not able to detect in runtime. The device reponds with 'invalid report id' when feature report switching it into multitouch mode is sent to it. This has been silently ignored before 0825411ade ("HID: bt: Wait for ACK on Sent Reports"), but since this commit, it propagates -EIO from the _raw callback . So let the driver ignore -EIO as response to 0xd7,0x01 report, as that's how the device reacts in normal mode. Sad, but following reality. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=35022 Reported-by: Chase Douglas Reported-by: Jaikumar Ganesh Tested-by: Chase Douglas Tested-by: Jaikumar Ganesh Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index b5bdab3..f0fbd7b 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -537,9 +537,17 @@ static int magicmouse_probe(struct hid_device *hdev, } report->size = 6; + /* + * Some devices repond with 'invalid report id' when feature + * report switching it into multitouch mode is sent to it. + * + * This results in -EIO from the _raw low-level transport callback, + * but there seems to be no other way of switching the mode. + * Thus the super-ugly hacky success check below. + */ ret = hdev->hid_output_raw_report(hdev, feature, sizeof(feature), HID_FEATURE_REPORT); - if (ret != sizeof(feature)) { + if (ret != -EIO && ret != sizeof(feature)) { hid_err(hdev, "unable to request touch data (%d)\n", ret); goto err_stop_hw; } -- cgit v1.1 From 468c5458856236cde6df1b0654d32bf6625349a5 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Thu, 25 Aug 2011 13:16:02 +0200 Subject: ALSA: hda: Conexant: Allow different output types to share DAC Headphones has stopped working for the original reported (a regression compared to 2.6.38). This is because Speaker and Headphones share the same DAC, in which case no Headphones volume control was created. This patch fixes so that both Speaker and Headphones volume controls are created in such scenario. BugLink: http://bugs.launchpad.net/bugs/817943 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 46 +++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 5616444..7696d05 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -3372,18 +3372,26 @@ static int fill_cx_auto_dacs(struct hda_codec *codec, hda_nid_t *dacs) /* fill pin_dac_pair list from the pin and dac list */ static int fill_dacs_for_pins(struct hda_codec *codec, hda_nid_t *pins, int num_pins, hda_nid_t *dacs, int *rest, - struct pin_dac_pair *filled, int type) + struct pin_dac_pair *filled, int nums, + int type) { - int i, nums; + int i, start = nums; - nums = 0; - for (i = 0; i < num_pins; i++) { + for (i = 0; i < num_pins; i++, nums++) { filled[nums].pin = pins[i]; filled[nums].type = type; filled[nums].dac = get_unassigned_dac(codec, pins[i], dacs, rest); - if (!filled[nums].dac && i > 0 && filled[0].dac) + if (filled[nums].dac) + continue; + if (filled[start].dac && get_connection_index(codec, pins[i], filled[start].dac) >= 0) { + filled[nums].dac = filled[start].dac | DAC_SLAVE_FLAG; + continue; + } + if (filled[0].dac && get_connection_index(codec, pins[i], filled[0].dac) >= 0) { filled[nums].dac = filled[0].dac | DAC_SLAVE_FLAG; - nums++; + continue; + } + snd_printdd("Failed to find a DAC for pin 0x%x", pins[i]); } return nums; } @@ -3399,14 +3407,14 @@ static void cx_auto_parse_output(struct hda_codec *codec) rest = fill_cx_auto_dacs(codec, dacs); /* parse all analog output pins */ nums = fill_dacs_for_pins(codec, cfg->line_out_pins, cfg->line_outs, - dacs, &rest, spec->dac_info, - AUTO_PIN_LINE_OUT); - nums += fill_dacs_for_pins(codec, cfg->hp_pins, cfg->hp_outs, - dacs, &rest, spec->dac_info + nums, - AUTO_PIN_HP_OUT); - nums += fill_dacs_for_pins(codec, cfg->speaker_pins, cfg->speaker_outs, - dacs, &rest, spec->dac_info + nums, - AUTO_PIN_SPEAKER_OUT); + dacs, &rest, spec->dac_info, 0, + AUTO_PIN_LINE_OUT); + nums = fill_dacs_for_pins(codec, cfg->hp_pins, cfg->hp_outs, + dacs, &rest, spec->dac_info, nums, + AUTO_PIN_HP_OUT); + nums = fill_dacs_for_pins(codec, cfg->speaker_pins, cfg->speaker_outs, + dacs, &rest, spec->dac_info, nums, + AUTO_PIN_SPEAKER_OUT); spec->dac_info_filled = nums; /* fill multiout struct */ for (i = 0; i < nums; i++) { @@ -4173,9 +4181,11 @@ static int try_add_pb_volume(struct hda_codec *codec, hda_nid_t dac, hda_nid_t pin, const char *name, int idx) { unsigned int caps; - caps = query_amp_caps(codec, dac, HDA_OUTPUT); - if (caps & AC_AMPCAP_NUM_STEPS) - return cx_auto_add_pb_volume(codec, dac, name, idx); + if (dac && !(dac & DAC_SLAVE_FLAG)) { + caps = query_amp_caps(codec, dac, HDA_OUTPUT); + if (caps & AC_AMPCAP_NUM_STEPS) + return cx_auto_add_pb_volume(codec, dac, name, idx); + } caps = query_amp_caps(codec, pin, HDA_OUTPUT); if (caps & AC_AMPCAP_NUM_STEPS) return cx_auto_add_pb_volume(codec, pin, name, idx); @@ -4198,8 +4208,6 @@ static int cx_auto_build_output_controls(struct hda_codec *codec) const char *label; int idx, type; hda_nid_t dac = spec->dac_info[i].dac; - if (!dac || (dac & DAC_SLAVE_FLAG)) - continue; type = spec->dac_info[i].type; if (type == AUTO_PIN_LINE_OUT) type = spec->autocfg.line_out_type; -- cgit v1.1 From 64584eb9cde5f3c5a07f24b2e7cd38f1157be181 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Aug 2011 15:31:05 +0200 Subject: PM / Runtime: Correct documentation of pm_runtime_irq_safe() The description of pm_runtime_irq_safe() has to be updated to follow the code after commit 02b2677 (PM / Runtime: Allow _put_sync() from interrupts-disabled context). Signed-off-by: Rafael J. Wysocki Acked-by: Kevin Hilman --- Documentation/power/runtime_pm.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 4ce5450..6066e3a 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -431,8 +431,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: void pm_runtime_irq_safe(struct device *dev); - set the power.irq_safe flag for the device, causing the runtime-PM - suspend and resume callbacks (but not the idle callback) to be invoked - with interrupts disabled + callbacks to be invoked with interrupts off void pm_runtime_mark_last_busy(struct device *dev); - set the power.last_busy field to the current time -- cgit v1.1 From ff35336d3efd1ec4015b56f690191ed69730cbb0 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 25 Aug 2011 15:31:14 +0200 Subject: OMAP: omap_device: only override _noirq methods, not normal suspend/resume Commit c03f007a8bf0e092caeb6856a5c8a850df10b974 (OMAP: PM: omap_device: add system PM methods for PM domain handling) mistakenly used SET_SYSTEM_SLEEP_PM_OPS() when trying to configure custom methods for the PM domains noirq methods. Fix that by setting only the suspend_noirq and resume_noirq methods with custom versions. Note that all other PM domain methods (including the "normal" suspend/resume methods) are populated using USE_PLATFORM_PM_SLEEP_OPS, which configures them all to the default subsystem (platform_bus) methods. Reported-by: Santosh Shilimkar Tested-by: Santosh Shilimkar Signed-off-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- arch/arm/plat-omap/omap_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c index b6b4097..9a6a538 100644 --- a/arch/arm/plat-omap/omap_device.c +++ b/arch/arm/plat-omap/omap_device.c @@ -622,7 +622,8 @@ static struct dev_pm_domain omap_device_pm_domain = { SET_RUNTIME_PM_OPS(_od_runtime_suspend, _od_runtime_resume, _od_runtime_idle) USE_PLATFORM_PM_SLEEP_OPS - SET_SYSTEM_SLEEP_PM_OPS(_od_suspend_noirq, _od_resume_noirq) + .suspend_noirq = _od_suspend_noirq, + .resume_noirq = _od_resume_noirq, } }; -- cgit v1.1 From 6d1db0777981e1626ae71243984ac300b61789ff Mon Sep 17 00:00:00 2001 From: Clemens Werther Date: Thu, 25 Aug 2011 15:35:14 +0200 Subject: HID: add support for HuiJia USB Gamepad connector Create each gamepad as a separate joystick Signed-off-by: Clemens Werther Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 7d27d2b..7484e1b 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -277,6 +277,7 @@ #define USB_DEVICE_ID_PENPOWER 0x00f4 #define USB_VENDOR_ID_GREENASIA 0x0e8f +#define USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD 0x3013 #define USB_VENDOR_ID_GRETAGMACBETH 0x0971 #define USB_DEVICE_ID_GRETAGMACBETH_HUEY 0x2005 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 4bdb5d4..3146fdc 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -47,6 +47,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_AFATECH, USB_DEVICE_ID_AFATECH_AF9016, HID_QUIRK_FULLSPEED_INTERVAL }, { USB_VENDOR_ID_ETURBOTOUCH, USB_DEVICE_ID_ETURBOTOUCH, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_PANTHERLORD, USB_DEVICE_ID_PANTHERLORD_TWIN_USB_JOYSTICK, HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS }, { USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_TOUCHPACK, USB_DEVICE_ID_TOUCHPACK_RTS, HID_QUIRK_MULTI_INPUT }, -- cgit v1.1 From 5166793feb688a884832ca656f161f683be8f04c Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 1 Aug 2011 12:42:14 -0400 Subject: arm: fix compile failure in orion5x/dns323-setup.c Upstream commit d5341942d784134f2997b3ff82cd63cf71d1f932 "PCI: Make the struct pci_dev * argument of pci_fixup_irqs const." leaked an extra "const" into an actual call site (vs a proto/decl) which causes this: arch/arm/mach-orion5x/dns323-setup.c: In function 'dns323_pci_map_irq': arch/arm/mach-orion5x/dns323-setup.c:80: error: expected expression before 'const' arch/arm/mach-orion5x/dns323-setup.c:80: error: too few arguments to function 'orion5x_pci_map_irq' make[3]: *** [arch/arm/mach-orion5x/dns323-setup.o] Error 1 Signed-off-by: Paul Gortmaker Acked-by: Nicolas Pitre Acked-by: Ralf Baechle --- arch/arm/mach-orion5x/dns323-setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c index a6eddae..c105556 100644 --- a/arch/arm/mach-orion5x/dns323-setup.c +++ b/arch/arm/mach-orion5x/dns323-setup.c @@ -77,7 +77,7 @@ static int __init dns323_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) /* * Check for devices with hard-wired IRQs. */ - irq = orion5x_pci_map_irq(const dev, slot, pin); + irq = orion5x_pci_map_irq(dev, slot, pin); if (irq != -1) return irq; -- cgit v1.1 From 158c0c623ab57e4e1cf705ce64b8efddc1cf82dd Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Wed, 17 Aug 2011 17:29:38 +0800 Subject: ARM: mach-orion5x: add missing header file This patch fixed following building error: -- arch/arm/mach-orion5x/pci.c: In function 'orion5x_pci_sys_setup': arch/arm/mach-orion5x/pci.c:563:2: error: 'vga_base' undeclared (first use in this function) arch/arm/mach-orion5x/pci.c:563:2: note: each undeclared identifier is reported only once for each function it appears in make[1]: *** [arch/arm/mach-orion5x/pci.o] Error 1 make[1]: *** Waiting for unfinished jobs.... -- Signed-off-by: Bryan Wu Acked-by: Rob Herring --- arch/arm/mach-orion5x/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c index 28b8760..bc4a920 100644 --- a/arch/arm/mach-orion5x/pci.c +++ b/arch/arm/mach-orion5x/pci.c @@ -14,6 +14,7 @@ #include #include #include +#include